diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 2cc36c6a6..e58004eb4 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -1,6 +1,27 @@
 name: "CodeQL"
 
-on: [ push, pull_request]
+on:
+
+  push:
+    paths:
+      - src/**
+      - cmake/**
+      - "**/CMakeLists.txt"
+      - "CMakePresets.json"
+      - .github/workflows/codeql.yml
+      - vcpkg.json
+      - "!**/Makefile*"
+
+  pull_request:
+    paths:
+      - src/**
+      - cmake/**
+      - "**/CMakeLists.txt"
+      - "CMakePresets.json"
+      - .github/workflows/**
+      - .github/workflows/codeql.yml
+      - vcpkg.json
+      - "!**/Makefile*"
 
 jobs:
   analyze:
diff --git a/README.md b/README.md
index a1e3985e1..e251356ed 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ It is also recommended to use a manager application with 86Box for easier handli
 * [86Box Manager](https://github.com/86Box/86BoxManager) by [Overdoze](https://github.com/daviunic) (Windows only)
 * [86Box Manager Lite](https://github.com/insanemal/86box_manager_py) by [Insanemal](https://github.com/insanemal)
 * [WinBox for 86Box](https://github.com/86Box/WinBox-for-86Box) by Laci bá' (Windows only)
+* [Linbox-qt5](https://github.com/Dungeonseeker/linbox-qt5) by Dungeonseeker (Linux focused, should work on Windows though untested)
+* [MacBox for 86Box](https://github.com/Moonif/MacBox) by [Moonif](https://github.com/Moonif) (MacOS only)
 
 It is also possible to use 86Box on its own with the `--vmpath`/`-P` command line option.
 
@@ -49,16 +51,6 @@ Licensing
 
 The emulator can also optionally make use of [munt](https://github.com/munt/munt), [FluidSynth](https://www.fluidsynth.org/), [Ghostscript](https://www.ghostscript.com/) and [Discord Game SDK](https://discord.com/developers/docs/game-sdk/sdk-starter-guide), which are distributed under their respective licenses.
 
-Contribution requirements
--------------------------
- Formal codification of the project's emulated hardware contribution requirements, which all have to be met to accept an addition:
-* A ROM must be available;
-* Documentation must be available or it must be feasible to reverse engineer with a reasonable amount of time and effort;
-* It must be feasible to implement with a reasonable amount of time and effort;
-* It has to fall inside the project's scope.
-
-Where unsure or for more details about the project's emulated hardware contribution requirements, contact a Contributor or higher.
-
 Donations
 ---------
 We do not charge you for the emulator but donations are still welcome:
diff --git a/src/86box.c b/src/86box.c
index 3c96d88a4..73f8a67ee 100644
--- a/src/86box.c
+++ b/src/86box.c
@@ -180,6 +180,7 @@ uint32_t isa_mem_size                           = 0;              /* (C) memory
 int      cpu_use_dynarec                        = 0;              /* (C) cpu uses/needs Dyna */
 int      cpu                                    = 0;              /* (C) cpu type */
 int      fpu_type                               = 0;              /* (C) fpu type */
+int      fpu_softfloat                          = 0;              /* (C) fpu uses softfloat */
 int      time_sync                              = 0;              /* (C) enable time sync */
 int      confirm_reset                          = 1;              /* (C) enable reset confirmation */
 int      confirm_exit                           = 1;              /* (C) enable exit confirmation */
diff --git a/src/acpi.c b/src/acpi.c
index cafa06229..625fdfef2 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -687,14 +687,13 @@ acpi_reg_write_common_regs(int size, uint16_t addr, uint8_t val, void *p)
                     }
 
                     if (sus_typ & SUS_RESET_PCI)
-                        device_reset_all_pci();
+                        device_reset_all(DEVICE_PCI);
 
                     if (sus_typ & SUS_RESET_CPU)
                         cpu_alt_reset = 0;
 
                     if (sus_typ & SUS_RESET_PCI) {
                         pci_reset();
-                        keyboard_at_reset();
 
                         mem_a20_alt = 0;
                         mem_a20_recalc();
diff --git a/src/chipset/ali1489.c b/src/chipset/ali1489.c
index 706b67f12..70ff509ab 100644
--- a/src/chipset/ali1489.c
+++ b/src/chipset/ali1489.c
@@ -180,9 +180,6 @@ ali1489_defaults(ali1489_t *dev)
     dev->regs[0x3d] = 0x01;
     dev->regs[0x40] = 0x03;
 
-    pic_kbd_latch(0x01);
-    pic_mouse_latch(0x00);
-
     ali1489_shadow_recalc(dev);
     cpu_cache_int_enabled = 0;
     cpu_cache_ext_enabled = 0;
@@ -298,7 +295,6 @@ ali1489_write(uint16_t addr, uint8_t val, void *priv)
 
                     case 0x2a: /* I/O Recovery Register */
                         dev->regs[dev->index] = val;
-                        pic_mouse_latch(val & 0x80);
                         break;
 
                     case 0x2b: /* Turbo Function Register */
diff --git a/src/chipset/ali1543.c b/src/chipset/ali1543.c
index 5aabd6c63..b587f91b5 100644
--- a/src/chipset/ali1543.c
+++ b/src/chipset/ali1543.c
@@ -151,11 +151,7 @@ ali1533_write(int func, int addr, uint8_t val, void *priv)
             break;
 
         case 0x41:
-            /* TODO: Bit 7 selects keyboard controller type:
-                     0 = AT, 1 = PS/2 */
-            pic_kbd_latch(!!(val & 0x80));
-            pic_mouse_latch(!!(val & 0x40));
-            dev->pci_conf[addr] = val & 0xbf;
+            dev->pci_conf[addr] = val;
             break;
 
         case 0x42: /* ISA Bus Speed */
@@ -1520,6 +1516,8 @@ ali1543_reset(void *priv)
     ali1533_write(0, 0x74, 0x00, dev);
     ali1533_write(0, 0x75, 0x00, dev);
     ali1533_write(0, 0x76, 0x00, dev);
+    if (dev->type == 1)
+        ali1533_write(0, 0x78, 0x00, dev);
 
     unmask_a20_in_smm = 1;
 }
diff --git a/src/chipset/ali6117.c b/src/chipset/ali6117.c
index 588aec3c3..98451067a 100644
--- a/src/chipset/ali6117.c
+++ b/src/chipset/ali6117.c
@@ -302,7 +302,6 @@ ali6117_reg_write(uint16_t addr, uint8_t val, void *priv)
                 case 0x36:
                     val &= 0xf0;
                     val |= dev->regs[dev->reg_offset];
-                    pic_mouse_latch(val & 0x40);
                     break;
 
                 case 0x37:
@@ -427,8 +426,6 @@ ali6117_reset(void *priv)
         /* On-board memory 15-16M is enabled by default. */
         mem_set_mem_state_both(0x00f00000, 0x00100000, MEM_READ_INTERNAL | MEM_WRITE_INTERNAL);
         ali6117_bank_recalc(dev);
-
-        pic_mouse_latch(0x00);
     }
 }
 
@@ -478,9 +475,6 @@ ali6117_init(const device_t *info)
         }
     }
 
-    if (!(dev->local & 0x08))
-        pic_kbd_latch(0x01);
-
     ali6117_reset(dev);
 
     if (!(dev->local & 0x08))
diff --git a/src/chipset/ims8848.c b/src/chipset/ims8848.c
index 57580f125..0b67661ef 100644
--- a/src/chipset/ims8848.c
+++ b/src/chipset/ims8848.c
@@ -389,9 +389,6 @@ ims8848_init(const device_t *info)
 
     ims8848_reset(dev);
 
-    pic_kbd_latch(0x01);
-    pic_mouse_latch(0x01);
-
     return dev;
 }
 
diff --git a/src/chipset/intel_420ex.c b/src/chipset/intel_420ex.c
index 2b1a08622..492f7a0d1 100644
--- a/src/chipset/intel_420ex.c
+++ b/src/chipset/intel_420ex.c
@@ -218,7 +218,6 @@ i420ex_write(int func, int addr, uint8_t val, void *priv)
             break;
         case 0x4e:
             dev->regs[addr] = (val & 0xf7);
-            pic_mouse_latch(!!(val & 0x10));
             break;
         case 0x50:
             dev->regs[addr] = (val & 0x0f);
@@ -389,7 +388,6 @@ i420ex_reset_hard(void *priv)
 
     dev->regs[0x4c] = 0x4d;
     dev->regs[0x4e] = 0x03;
-    pic_mouse_latch(0x00);
    /* Bits 2:1 of register 50h are 00 is 25 MHz, and 01 if 33 MHz, 10 and 11 are reserved. */
     if (cpu_busspeed >= 33333333)
         dev->regs[0x50] |= 0x02;
@@ -526,8 +524,6 @@ i420ex_init(const device_t *info)
 
     i420ex_reset_hard(dev);
 
-    pic_kbd_latch(0x01);
-
     return dev;
 }
 
diff --git a/src/chipset/intel_piix.c b/src/chipset/intel_piix.c
index 94468b435..718df8344 100644
--- a/src/chipset/intel_piix.c
+++ b/src/chipset/intel_piix.c
@@ -512,7 +512,6 @@ piix_write(int func, int addr, uint8_t val, void *priv)
                 break;
             case 0x4e:
                 fregs[0x4e] = val;
-                pic_mouse_latch(!!(val & 0x10));
                 if (dev->type >= 4)
                     kbc_alias_update_io_mapping(dev);
                 break;
@@ -1275,7 +1274,6 @@ piix_reset_hard(piix_t *dev)
     fregs[0x0e] = ((dev->type > 1) || (dev->rev != 2)) ? 0x80 : 0x00;
     fregs[0x4c] = 0x4d;
     fregs[0x4e] = 0x03;
-    pic_mouse_latch(0x00);
     fregs[0x60] = fregs[0x61] = fregs[0x62] = fregs[0x63] = 0x80;
     fregs[0x64]                                           = (dev->type > 3) ? 0x10 : 0x00;
     fregs[0x69]                                           = 0x02;
@@ -1681,8 +1679,6 @@ piix_init(const device_t *info)
 
     // device_add(&i8254_sec_device);
 
-    pic_kbd_latch(0x01);
-
     return dev;
 }
 
diff --git a/src/chipset/intel_sio.c b/src/chipset/intel_sio.c
index eb7aad983..26f3cb422 100644
--- a/src/chipset/intel_sio.c
+++ b/src/chipset/intel_sio.c
@@ -200,9 +200,10 @@ sio_write(int func, int addr, uint8_t val, void *priv)
             dev->regs[addr] = val;
             break;
         case 0x4c:
+            dev->regs[addr] = (val & 0x7f);
+            break;
         case 0x4d:
             dev->regs[addr] = (val & 0x7f);
-            pic_mouse_latch(!!(val & 0x10));
             break;
         case 0x4f:
             dev->regs[addr] = val;
@@ -394,7 +395,6 @@ sio_reset_hard(void *priv)
     dev->regs[0x4b] = 0x0f;
     dev->regs[0x4c] = 0x56;
     dev->regs[0x4d] = 0x40;
-    pic_mouse_latch(0x00);
     dev->regs[0x4e] = 0x07;
     dev->regs[0x4f] = 0x4f;
     dev->regs[0x57] = 0x04;
@@ -544,8 +544,6 @@ sio_init(const device_t *info)
 
     // device_add(&i8254_sec_device);
 
-    pic_kbd_latch(0x01);
-
     return dev;
 }
 
diff --git a/src/chipset/sis_5571.c b/src/chipset/sis_5571.c
index 53ef7956e..0f77a1a07 100644
--- a/src/chipset/sis_5571.c
+++ b/src/chipset/sis_5571.c
@@ -726,9 +726,6 @@ sis_5571_init(const device_t *info)
 
     sis_5571_reset(dev);
 
-    pic_kbd_latch(0x01);
-    pic_mouse_latch(0x01);
-
     return dev;
 }
 
diff --git a/src/chipset/sis_85c50x.c b/src/chipset/sis_85c50x.c
index 56d3a0a30..f174576c9 100644
--- a/src/chipset/sis_85c50x.c
+++ b/src/chipset/sis_85c50x.c
@@ -29,6 +29,8 @@
 #include <86box/timer.h>
 
 #include <86box/apm.h>
+#include <86box/machine.h>
+#include <86box/pic.h>
 #include <86box/mem.h>
 #include <86box/smram.h>
 #include <86box/pci.h>
diff --git a/src/chipset/umc_8886.c b/src/chipset/umc_8886.c
index 3def68666..dea5ac99a 100644
--- a/src/chipset/umc_8886.c
+++ b/src/chipset/umc_8886.c
@@ -374,9 +374,6 @@ umc_8886_init(const device_t *info)
 
     umc_8886_reset(dev);
 
-    pic_kbd_latch(0x01);
-    pic_mouse_latch(0x01);
-
     return dev;
 }
 
diff --git a/src/chipset/via_pipc.c b/src/chipset/via_pipc.c
index 31da38b48..c2abc4465 100644
--- a/src/chipset/via_pipc.c
+++ b/src/chipset/via_pipc.c
@@ -222,9 +222,6 @@ pipc_reset_hard(void *priv)
     dev->pci_isa_regs[0x0b] = 0x06;
     dev->pci_isa_regs[0x0e] = 0x80;
 
-    pic_kbd_latch(0x01);
-    pic_mouse_latch(dev->local >= VIA_PIPC_586B);
-
     dev->pci_isa_regs[0x48] = 0x01;
     dev->pci_isa_regs[0x4a] = 0x04;
     dev->pci_isa_regs[0x4f] = 0x03;
@@ -1067,8 +1064,7 @@ pipc_write(int func, int addr, uint8_t val, void *priv)
                 break;
 
             case 0x44:
-                if (dev->local < VIA_PIPC_586B)
-                    pic_mouse_latch(val & 0x01);
+                dev->pci_isa_regs[0x44] = val;
                 break;
 
             case 0x47:
diff --git a/src/config.c b/src/config.c
index 6399c91ab..75a3973e4 100644
--- a/src/config.c
+++ b/src/config.c
@@ -504,6 +504,14 @@ load_machine(void)
         mem_size = machine_get_max_ram(machine);
 
     cpu_use_dynarec = !!ini_section_get_int(cat, "cpu_use_dynarec", 0);
+    fpu_softfloat = !!ini_section_get_int(cat, "fpu_softfloat", 0);
+    /*The IBM PS/2 model 70 type 4 BIOS does heavy tests to the FPU in 80-bit precision mode, requiring softfloat 
+      otherwise it would always throw error 12903 on POST, so always disable dynarec and enable softfloat for this
+      machine only.*/
+    if (!strcmp(machines[machine].internal_name, "ibmps2_m70_type4")) {
+        cpu_use_dynarec = 0;
+        fpu_softfloat = 1;
+    }
 
     p = ini_section_get_string(cat, "time_sync", NULL);
     if (p != NULL) {
@@ -718,6 +726,24 @@ load_sound(void)
 
     mpu401_standalone_enable = !!ini_section_get_int(cat, "mpu401_standalone", 0);
 
+    /* Backwards compatibility for standalone SSI-2001, CMS and GUS from v3.11 and older. */
+    const char *legacy_cards[][2] = {
+        {"ssi2001",      "ssi2001"},
+        { "gameblaster", "cms"    },
+        { "gus",         "gus"    }
+    };
+    for (int i = 0, j = 0; i < (sizeof(legacy_cards) / sizeof(legacy_cards[0])); i++) {
+        if (ini_section_get_int(cat, legacy_cards[i][0], 0) == 1) {
+            /* Migrate to the first available sound card slot. */
+            for (; j < (sizeof(sound_card_current) / sizeof(sound_card_current[0])); j++) {
+                if (!sound_card_current[j]) {
+                    sound_card_current[j] = sound_card_get_from_internal_name(legacy_cards[i][1]);
+                    break;
+                }
+            }
+        }
+    }
+
     memset(temp, '\0', sizeof(temp));
     p = ini_section_get_string(cat, "sound_type", "float");
     if (strlen(p) > 511)
@@ -2159,7 +2185,7 @@ save_machine(void)
     else
         ini_section_delete_var(cat, "cpu_override");
 
-    /* Forwards compatibility with the previous CPU model system. */
+    /* Downgrade compatibility with the previous CPU model system. */
     ini_section_delete_var(cat, "cpu_manufacturer");
     ini_section_delete_var(cat, "cpu");
 
@@ -2226,6 +2252,7 @@ save_machine(void)
     ini_section_set_int(cat, "mem_size", mem_size);
 
     ini_section_set_int(cat, "cpu_use_dynarec", cpu_use_dynarec);
+    ini_section_set_int(cat, "fpu_softfloat", fpu_softfloat);
 
     if (time_sync & TIME_SYNC_ENABLED)
         if (time_sync & TIME_SYNC_UTC)
@@ -2390,6 +2417,27 @@ save_sound(void)
     else
         ini_section_set_int(cat, "mpu401_standalone", mpu401_standalone_enable);
 
+    /* Downgrade compatibility for standalone SSI-2001, CMS and GUS from v3.11 and older. */
+    const char *legacy_cards[][2] = {
+        {"ssi2001",      "ssi2001"},
+        { "gameblaster", "cms"    },
+        { "gus",         "gus"    }
+    };
+    for (int i = 0; i < (sizeof(legacy_cards) / sizeof(legacy_cards[0])); i++) {
+        int card_id = sound_card_get_from_internal_name(legacy_cards[i][1]);
+        for (int j = 0; j < (sizeof(sound_card_current) / sizeof(sound_card_current[0])); j++) {
+            if (sound_card_current[j] == card_id) {
+                /* A special value of 2 still enables the cards on older versions,
+                   but lets newer versions know that they've already been migrated. */
+                ini_section_set_int(cat, legacy_cards[i][0], 2);
+                card_id = 0; /* mark as found */
+                break;
+            }
+        }
+        if (card_id > 0) /* not found */
+            ini_section_delete_var(cat, legacy_cards[i][0]);
+    }
+
     if (sound_is_float == 1)
         ini_section_delete_var(cat, "sound_type");
     else
diff --git a/src/cpu/386_common.c b/src/cpu/386_common.c
index f8e7c11ce..25afccc95 100644
--- a/src/cpu/386_common.c
+++ b/src/cpu/386_common.c
@@ -780,8 +780,8 @@ smram_restore_state_p6(uint32_t *saved_state)
     cpu_state.seg_gs.ar_high = (saved_state[SMRAM_FIELD_P6_GS_SELECTOR_AR] >> 24) & 0xff;
     smm_seg_load(&cpu_state.seg_gs);
 
-    mem_a20_alt = 0;
-    keyboard_at_set_a20_key(!saved_state[SMRAM_FIELD_P6_A20M]);
+    mem_a20_alt = 0x00;
+    mem_a20_key = saved_state[SMRAM_FIELD_P6_A20M] ? 0x00 : 0x02;
     mem_a20_recalc();
 
     if (SMM_REVISION_ID & SMM_SMBASE_RELOCATION)
@@ -1053,13 +1053,13 @@ enter_smm(int in_hlt)
 
     memset(saved_state, 0x00, SMM_SAVE_STATE_MAP_SIZE * sizeof(uint32_t));
 
-    if (is_cxsmm) /* Cx6x86 */
+    if (is_cxsmm)                    /* Cx6x86 */
         smram_save_state_cyrix(saved_state, in_hlt);
     else if (is_pentium || is_am486) /* Am486 / 5x86 / Intel P5 (Pentium) */
         smram_save_state_p5(saved_state, in_hlt);
-    else if (is_k5 || is_k6) /* AMD K5 and K6 */
+    else if (is_k5 || is_k6)         /* AMD K5 and K6 */
         smram_save_state_amd_k(saved_state, in_hlt);
-    else if (is_p6) /* Intel P6 (Pentium Pro, Pentium II, Celeron) */
+    else if (is_p6)                  /* Intel P6 (Pentium Pro, Pentium II, Celeron) */
         smram_save_state_p6(saved_state, in_hlt);
 
     cr0 &= ~0x8000000d;
@@ -1224,13 +1224,13 @@ leave_smm(void)
     }
 
     x386_common_log("New SMBASE: %08X (%08X)\n", saved_state[SMRAM_FIELD_P5_SMBASE_OFFSET], saved_state[66]);
-    if (is_cxsmm) /* Cx6x86 */
+    if (is_cxsmm)                    /* Cx6x86 */
         smram_restore_state_cyrix(saved_state);
     else if (is_pentium || is_am486) /* Am486 / 5x86 / Intel P5 (Pentium) */
         smram_restore_state_p5(saved_state);
-    else if (is_k5 || is_k6) /* AMD K5 and K6 */
+    else if (is_k5 || is_k6)         /* AMD K5 and K6 */
         smram_restore_state_amd_k(saved_state);
-    else if (is_p6) /* Intel P6 (Pentium Pro, Pentium II, Celeron) */
+    else if (is_p6)                  /* Intel P6 (Pentium Pro, Pentium II, Celeron) */
         smram_restore_state_p6(saved_state);
 
     in_smm = 0;
@@ -1427,25 +1427,29 @@ x86illegal(void)
 }
 
 int
-checkio(uint32_t port)
+checkio(uint32_t port, int mask)
 {
-    uint16_t t;
-    uint8_t  d;
+    uint32_t t;
 
     cpl_override = 1;
     t            = readmemw(tr.base, 0x66);
-    cpl_override = 0;
 
-    if (cpu_state.abrt)
+    if (UNLIKELY(cpu_state.abrt)) {
+        cpl_override = 0;
         return 0;
+    }
 
-    if ((t + (port >> 3UL)) > tr.limit)
-        return 1;
-
-    cpl_override = 1;
-    d            = readmembl(tr.base + t + (port >> 3));
+    t += (port >> 3UL);
+    mask <<= (port & 7);
+    if (UNLIKELY(mask & 0xff00)) {
+        if (LIKELY(t < tr.limit))
+            mask &= readmemwl(tr.base + t);
+    } else {
+        if (LIKELY(t <= tr.limit))
+            mask &= readmembl(tr.base + t);
+    }
     cpl_override = 0;
-    return d & (1 << (port & 7));
+    return mask;
 }
 
 #ifdef OLD_DIVEXCP
diff --git a/src/cpu/386_common.h b/src/cpu/386_common.h
index f26eb7f98..ae71347b6 100644
--- a/src/cpu/386_common.h
+++ b/src/cpu/386_common.h
@@ -97,11 +97,11 @@
     if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \
     do_mmutranslate((s) + (a), b, 4, 1)
 
-int checkio(uint32_t port);
+int checkio(uint32_t port, int mask);
 
-#define check_io_perm(port)                                          \
+#define check_io_perm(port, size)                                    \
     if (msw & 1 && ((CPL > IOPL) || (cpu_state.eflags & VM_FLAG))) { \
-        int tempi = checkio(port);                                   \
+        int tempi = checkio(port, (1 << size) - 1);                  \
         if (cpu_state.abrt)                                          \
             return 1;                                                \
         if (tempi) {                                                 \
diff --git a/src/cpu/808x.c b/src/cpu/808x.c
index b2859153a..925594a11 100644
--- a/src/cpu/808x.c
+++ b/src/cpu/808x.c
@@ -426,13 +426,13 @@ pfq_write(void)
            free in the queue. */
         tempw                         = readmemwf(pfq_ip);
         *(uint16_t *) &(pfq[pfq_pos]) = tempw;
-        pfq_ip += 2;
+        pfq_ip = (pfq_ip + 2) & 0xffff;
         pfq_pos += 2;
     } else if (!is8086 && (pfq_pos < pfq_size)) {
         /* The 8088 fetches 1 byte at a time, and only if there's at least 1 byte
            free in the queue. */
         pfq[pfq_pos] = readmembf(pfq_ip);
-        pfq_ip++;
+        pfq_ip = (pfq_ip + 1) & 0xffff;
         pfq_pos++;
     }
 }
@@ -440,10 +440,10 @@ pfq_write(void)
 static uint8_t
 pfq_read(void)
 {
-    uint8_t temp, i;
+    uint8_t temp;
 
     temp = pfq[0];
-    for (i = 0; i < (pfq_size - 1); i++)
+    for (int i = 0; i < (pfq_size - 1); i++)
         pfq[i] = pfq[i + 1];
     pfq_pos--;
     cpu_state.pc = (cpu_state.pc + 1) & 0xffff;
@@ -2248,7 +2248,7 @@ execx86(int cycs)
 
                             default:
                                 opcode = orig_opcode;
-                                cpu_state.pc--;
+                                cpu_state.pc = (cpu_state.pc - 1) & 0xffff;
                                 break;
                         }
                     } else
@@ -3178,33 +3178,63 @@ execx86(int cycs)
                     tempw = cpu_state.pc;
                     if (!hasfpu)
                         geteaw();
-                    else
-                        switch (opcode) {
-                            case 0xD8:
-                                ops_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat);
-                                break;
-                            case 0xD9:
-                                ops_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat);
-                                break;
-                            case 0xDA:
-                                ops_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat);
-                                break;
-                            case 0xDB:
-                                ops_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat);
-                                break;
-                            case 0xDC:
-                                ops_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat);
-                                break;
-                            case 0xDD:
-                                ops_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat);
-                                break;
-                            case 0xDE:
-                                ops_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat);
-                                break;
-                            case 0xDF:
-                                ops_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat);
-                                break;
+                    else {
+                        if (fpu_softfloat) {
+                            switch (opcode) {
+                                case 0xD8:
+                                    ops_sf_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat);
+                                    break;
+                                case 0xD9:
+                                    ops_sf_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDA:
+                                    ops_sf_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDB:
+                                    ops_sf_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDC:
+                                    ops_sf_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat);
+                                    break;
+                                case 0xDD:
+                                    ops_sf_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDE:
+                                    ops_sf_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDF:
+                                    ops_sf_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                            }
+                        } else {
+                            switch (opcode) {
+                                case 0xD8:
+                                    ops_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat);
+                                    break;
+                                case 0xD9:
+                                    ops_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDA:
+                                    ops_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDB:
+                                    ops_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDC:
+                                    ops_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat);
+                                    break;
+                                case 0xDD:
+                                    ops_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDE:
+                                    ops_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                                case 0xDF:
+                                    ops_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat);
+                                    break;
+                            }
                         }
+                    }
                     cpu_state.pc = tempw; /* Do this as the x87 code advances it, which is needed on
                                              the 286+ core, but not here. */
                     wait(1, 0);
diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt
index 68baaf293..18aa06023 100644
--- a/src/cpu/CMakeLists.txt
+++ b/src/cpu/CMakeLists.txt
@@ -32,3 +32,6 @@ if(DYNAREC)
         codegen_timing_pentium.c codegen_timing_p6.c
         codegen_timing_winchip.c codegen_timing_winchip2.c)
 endif()
+
+add_subdirectory(softfloat)
+target_link_libraries(86Box softfloat)
diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c
index 33c474ba3..9e6f2ec83 100644
--- a/src/cpu/cpu.c
+++ b/src/cpu/cpu.c
@@ -74,6 +74,7 @@ enum {
 
 /* Make sure this is as low as possible. */
 cpu_state_t cpu_state;
+fpu_state_t fpu_state;
 
 /* Place this immediately after. */
 uint32_t abrt_error;
@@ -207,6 +208,8 @@ void
 cpu_set_edx(void)
 {
     EDX = cpu_s->edx_reset;
+    if (!cpu_use_dynarec && fpu_softfloat)
+        SF_FPU_reset();
 }
 
 cpu_family_t *
@@ -350,6 +353,23 @@ cpu_family_is_eligible(const cpu_family_t *cpu_family, int machine)
     return 0;
 }
 
+void
+SF_FPU_reset(void)
+{
+    if (fpu_type != FPU_NONE) {
+        fpu_state.cwd = 0x0040;
+        fpu_state.swd = 0;
+        fpu_state.tos = 0;
+        fpu_state.tag = 0x5555;
+        fpu_state.foo = 0;
+        fpu_state.fip = 0;
+        fpu_state.fcs = 0;
+        fpu_state.fds = 0;
+        fpu_state.fdp = 0;
+        memset(fpu_state.st_space, 0, sizeof(floatx80)*8);
+    }
+}
+
 void
 cpu_set(void)
 {
@@ -460,22 +480,41 @@ cpu_set(void)
         x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_df_a16;
         x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_df_a32;
 #endif
-        x86_opcodes_d8_a16 = ops_fpu_d8_a16;
-        x86_opcodes_d8_a32 = ops_fpu_d8_a32;
-        x86_opcodes_d9_a16 = ops_fpu_d9_a16;
-        x86_opcodes_d9_a32 = ops_fpu_d9_a32;
-        x86_opcodes_da_a16 = ops_fpu_da_a16;
-        x86_opcodes_da_a32 = ops_fpu_da_a32;
-        x86_opcodes_db_a16 = ops_fpu_db_a16;
-        x86_opcodes_db_a32 = ops_fpu_db_a32;
-        x86_opcodes_dc_a16 = ops_fpu_dc_a16;
-        x86_opcodes_dc_a32 = ops_fpu_dc_a32;
-        x86_opcodes_dd_a16 = ops_fpu_dd_a16;
-        x86_opcodes_dd_a32 = ops_fpu_dd_a32;
-        x86_opcodes_de_a16 = ops_fpu_de_a16;
-        x86_opcodes_de_a32 = ops_fpu_de_a32;
-        x86_opcodes_df_a16 = ops_fpu_df_a16;
-        x86_opcodes_df_a32 = ops_fpu_df_a32;
+        if (fpu_softfloat) {
+            x86_opcodes_d8_a16 = ops_sf_fpu_d8_a16;
+            x86_opcodes_d8_a32 = ops_sf_fpu_d8_a32;
+            x86_opcodes_d9_a16 = ops_sf_fpu_d9_a16;
+            x86_opcodes_d9_a32 = ops_sf_fpu_d9_a32;
+            x86_opcodes_da_a16 = ops_sf_fpu_da_a16;
+            x86_opcodes_da_a32 = ops_sf_fpu_da_a32;
+            x86_opcodes_db_a16 = ops_sf_fpu_db_a16;
+            x86_opcodes_db_a32 = ops_sf_fpu_db_a32;
+            x86_opcodes_dc_a16 = ops_sf_fpu_dc_a16;
+            x86_opcodes_dc_a32 = ops_sf_fpu_dc_a32;
+            x86_opcodes_dd_a16 = ops_sf_fpu_dd_a16;
+            x86_opcodes_dd_a32 = ops_sf_fpu_dd_a32;
+            x86_opcodes_de_a16 = ops_sf_fpu_de_a16;
+            x86_opcodes_de_a32 = ops_sf_fpu_de_a32;
+            x86_opcodes_df_a16 = ops_sf_fpu_df_a16;
+            x86_opcodes_df_a32 = ops_sf_fpu_df_a32;
+        } else {
+            x86_opcodes_d8_a16 = ops_fpu_d8_a16;
+            x86_opcodes_d8_a32 = ops_fpu_d8_a32;
+            x86_opcodes_d9_a16 = ops_fpu_d9_a16;
+            x86_opcodes_d9_a32 = ops_fpu_d9_a32;
+            x86_opcodes_da_a16 = ops_fpu_da_a16;
+            x86_opcodes_da_a32 = ops_fpu_da_a32;
+            x86_opcodes_db_a16 = ops_fpu_db_a16;
+            x86_opcodes_db_a32 = ops_fpu_db_a32;
+            x86_opcodes_dc_a16 = ops_fpu_dc_a16;
+            x86_opcodes_dc_a32 = ops_fpu_dc_a32;
+            x86_opcodes_dd_a16 = ops_fpu_dd_a16;
+            x86_opcodes_dd_a32 = ops_fpu_dd_a32;
+            x86_opcodes_de_a16 = ops_fpu_de_a16;
+            x86_opcodes_de_a32 = ops_fpu_de_a32;
+            x86_opcodes_df_a16 = ops_fpu_df_a16;
+            x86_opcodes_df_a32 = ops_fpu_df_a32;
+        }
     } else {
 #ifdef USE_DYNAREC
         x86_dynarec_opcodes_d8_a16 = dynarec_ops_nofpu_a16;
@@ -563,20 +602,37 @@ cpu_set(void)
                 x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_287_df_a16;
                 x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_287_df_a32;
 #endif
-                x86_opcodes_d9_a16 = ops_fpu_287_d9_a16;
-                x86_opcodes_d9_a32 = ops_fpu_287_d9_a32;
-                x86_opcodes_da_a16 = ops_fpu_287_da_a16;
-                x86_opcodes_da_a32 = ops_fpu_287_da_a32;
-                x86_opcodes_db_a16 = ops_fpu_287_db_a16;
-                x86_opcodes_db_a32 = ops_fpu_287_db_a32;
-                x86_opcodes_dc_a16 = ops_fpu_287_dc_a16;
-                x86_opcodes_dc_a32 = ops_fpu_287_dc_a32;
-                x86_opcodes_dd_a16 = ops_fpu_287_dd_a16;
-                x86_opcodes_dd_a32 = ops_fpu_287_dd_a32;
-                x86_opcodes_de_a16 = ops_fpu_287_de_a16;
-                x86_opcodes_de_a32 = ops_fpu_287_de_a32;
-                x86_opcodes_df_a16 = ops_fpu_287_df_a16;
-                x86_opcodes_df_a32 = ops_fpu_287_df_a32;
+                if (fpu_softfloat) {
+                    x86_opcodes_d9_a16 = ops_sf_fpu_287_d9_a16;
+                    x86_opcodes_d9_a32 = ops_sf_fpu_287_d9_a32;
+                    x86_opcodes_da_a16 = ops_sf_fpu_287_da_a16;
+                    x86_opcodes_da_a32 = ops_sf_fpu_287_da_a32;
+                    x86_opcodes_db_a16 = ops_sf_fpu_287_db_a16;
+                    x86_opcodes_db_a32 = ops_sf_fpu_287_db_a32;
+                    x86_opcodes_dc_a16 = ops_sf_fpu_287_dc_a16;
+                    x86_opcodes_dc_a32 = ops_sf_fpu_287_dc_a32;
+                    x86_opcodes_dd_a16 = ops_sf_fpu_287_dd_a16;
+                    x86_opcodes_dd_a32 = ops_sf_fpu_287_dd_a32;
+                    x86_opcodes_de_a16 = ops_sf_fpu_287_de_a16;
+                    x86_opcodes_de_a32 = ops_sf_fpu_287_de_a32;
+                    x86_opcodes_df_a16 = ops_sf_fpu_287_df_a16;
+                    x86_opcodes_df_a32 = ops_sf_fpu_287_df_a32;
+                } else {
+                    x86_opcodes_d9_a16 = ops_fpu_287_d9_a16;
+                    x86_opcodes_d9_a32 = ops_fpu_287_d9_a32;
+                    x86_opcodes_da_a16 = ops_fpu_287_da_a16;
+                    x86_opcodes_da_a32 = ops_fpu_287_da_a32;
+                    x86_opcodes_db_a16 = ops_fpu_287_db_a16;
+                    x86_opcodes_db_a32 = ops_fpu_287_db_a32;
+                    x86_opcodes_dc_a16 = ops_fpu_287_dc_a16;
+                    x86_opcodes_dc_a32 = ops_fpu_287_dc_a32;
+                    x86_opcodes_dd_a16 = ops_fpu_287_dd_a16;
+                    x86_opcodes_dd_a32 = ops_fpu_287_dd_a32;
+                    x86_opcodes_de_a16 = ops_fpu_287_de_a16;
+                    x86_opcodes_de_a32 = ops_fpu_287_de_a32;
+                    x86_opcodes_df_a16 = ops_fpu_287_df_a16;
+                    x86_opcodes_df_a32 = ops_fpu_287_df_a32;
+                }
             }
 
             timing_rr  = 2;  /* register dest - register src */
@@ -639,20 +695,37 @@ cpu_set(void)
                 x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_287_df_a16;
                 x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_287_df_a32;
 #endif
-                x86_opcodes_d9_a16 = ops_fpu_287_d9_a16;
-                x86_opcodes_d9_a32 = ops_fpu_287_d9_a32;
-                x86_opcodes_da_a16 = ops_fpu_287_da_a16;
-                x86_opcodes_da_a32 = ops_fpu_287_da_a32;
-                x86_opcodes_db_a16 = ops_fpu_287_db_a16;
-                x86_opcodes_db_a32 = ops_fpu_287_db_a32;
-                x86_opcodes_dc_a16 = ops_fpu_287_dc_a16;
-                x86_opcodes_dc_a32 = ops_fpu_287_dc_a32;
-                x86_opcodes_dd_a16 = ops_fpu_287_dd_a16;
-                x86_opcodes_dd_a32 = ops_fpu_287_dd_a32;
-                x86_opcodes_de_a16 = ops_fpu_287_de_a16;
-                x86_opcodes_de_a32 = ops_fpu_287_de_a32;
-                x86_opcodes_df_a16 = ops_fpu_287_df_a16;
-                x86_opcodes_df_a32 = ops_fpu_287_df_a32;
+                if (fpu_softfloat) {
+                    x86_opcodes_d9_a16 = ops_sf_fpu_287_d9_a16;
+                    x86_opcodes_d9_a32 = ops_sf_fpu_287_d9_a32;
+                    x86_opcodes_da_a16 = ops_sf_fpu_287_da_a16;
+                    x86_opcodes_da_a32 = ops_sf_fpu_287_da_a32;
+                    x86_opcodes_db_a16 = ops_sf_fpu_287_db_a16;
+                    x86_opcodes_db_a32 = ops_sf_fpu_287_db_a32;
+                    x86_opcodes_dc_a16 = ops_sf_fpu_287_dc_a16;
+                    x86_opcodes_dc_a32 = ops_sf_fpu_287_dc_a32;
+                    x86_opcodes_dd_a16 = ops_sf_fpu_287_dd_a16;
+                    x86_opcodes_dd_a32 = ops_sf_fpu_287_dd_a32;
+                    x86_opcodes_de_a16 = ops_sf_fpu_287_de_a16;
+                    x86_opcodes_de_a32 = ops_sf_fpu_287_de_a32;
+                    x86_opcodes_df_a16 = ops_sf_fpu_287_df_a16;
+                    x86_opcodes_df_a32 = ops_sf_fpu_287_df_a32;
+                } else {
+                    x86_opcodes_d9_a16 = ops_fpu_287_d9_a16;
+                    x86_opcodes_d9_a32 = ops_fpu_287_d9_a32;
+                    x86_opcodes_da_a16 = ops_fpu_287_da_a16;
+                    x86_opcodes_da_a32 = ops_fpu_287_da_a32;
+                    x86_opcodes_db_a16 = ops_fpu_287_db_a16;
+                    x86_opcodes_db_a32 = ops_fpu_287_db_a32;
+                    x86_opcodes_dc_a16 = ops_fpu_287_dc_a16;
+                    x86_opcodes_dc_a32 = ops_fpu_287_dc_a32;
+                    x86_opcodes_dd_a16 = ops_fpu_287_dd_a16;
+                    x86_opcodes_dd_a32 = ops_fpu_287_dd_a32;
+                    x86_opcodes_de_a16 = ops_fpu_287_de_a16;
+                    x86_opcodes_de_a32 = ops_fpu_287_de_a32;
+                    x86_opcodes_df_a16 = ops_fpu_287_df_a16;
+                    x86_opcodes_df_a32 = ops_fpu_287_df_a32;
+                }
             }
 
             timing_rr = 2; /* register dest - register src */
@@ -1054,12 +1127,21 @@ cpu_set(void)
                 x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_686_df_a16;
                 x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_686_df_a32;
 #    endif
-                x86_opcodes_da_a16 = ops_fpu_686_da_a16;
-                x86_opcodes_da_a32 = ops_fpu_686_da_a32;
-                x86_opcodes_db_a16 = ops_fpu_686_db_a16;
-                x86_opcodes_db_a32 = ops_fpu_686_db_a32;
-                x86_opcodes_df_a16 = ops_fpu_686_df_a16;
-                x86_opcodes_df_a32 = ops_fpu_686_df_a32;
+                if (fpu_softfloat) {
+                    x86_opcodes_da_a16 = ops_sf_fpu_686_da_a16;
+                    x86_opcodes_da_a32 = ops_sf_fpu_686_da_a32;
+                    x86_opcodes_db_a16 = ops_sf_fpu_686_db_a16;
+                    x86_opcodes_db_a32 = ops_sf_fpu_686_db_a32;
+                    x86_opcodes_df_a16 = ops_sf_fpu_686_df_a16;
+                    x86_opcodes_df_a32 = ops_sf_fpu_686_df_a32;
+                } else {
+                    x86_opcodes_da_a16 = ops_fpu_686_da_a16;
+                    x86_opcodes_da_a32 = ops_fpu_686_da_a32;
+                    x86_opcodes_db_a16 = ops_fpu_686_db_a16;
+                    x86_opcodes_db_a32 = ops_fpu_686_db_a32;
+                    x86_opcodes_df_a16 = ops_fpu_686_df_a16;
+                    x86_opcodes_df_a32 = ops_fpu_686_df_a32;
+                }
             }
 
 #    ifdef USE_DYNAREC
@@ -1262,12 +1344,21 @@ cpu_set(void)
             else
                 x86_setopcodes(ops_386, ops_pentium2_0f);
 #endif
-            x86_opcodes_da_a16 = ops_fpu_686_da_a16;
-            x86_opcodes_da_a32 = ops_fpu_686_da_a32;
-            x86_opcodes_db_a16 = ops_fpu_686_db_a16;
-            x86_opcodes_db_a32 = ops_fpu_686_db_a32;
-            x86_opcodes_df_a16 = ops_fpu_686_df_a16;
-            x86_opcodes_df_a32 = ops_fpu_686_df_a32;
+            if (fpu_softfloat) {
+                x86_opcodes_da_a16 = ops_sf_fpu_686_da_a16;
+                x86_opcodes_da_a32 = ops_sf_fpu_686_da_a32;
+                x86_opcodes_db_a16 = ops_sf_fpu_686_db_a16;
+                x86_opcodes_db_a32 = ops_sf_fpu_686_db_a32;
+                x86_opcodes_df_a16 = ops_sf_fpu_686_df_a16;
+                x86_opcodes_df_a32 = ops_sf_fpu_686_df_a32;
+            } else {
+                x86_opcodes_da_a16 = ops_fpu_686_da_a16;
+                x86_opcodes_da_a32 = ops_fpu_686_da_a32;
+                x86_opcodes_db_a16 = ops_fpu_686_db_a16;
+                x86_opcodes_db_a32 = ops_fpu_686_db_a32;
+                x86_opcodes_df_a16 = ops_fpu_686_df_a16;
+                x86_opcodes_df_a32 = ops_fpu_686_df_a32;
+            }
 
             timing_rr  = 1; /* register dest - register src */
             timing_rm  = 2; /* register dest - memory src */
diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h
index 2eb64e758..b513e771e 100644
--- a/src/cpu/cpu.h
+++ b/src/cpu/cpu.h
@@ -21,6 +21,8 @@
 #ifndef EMU_CPU_H
 #define EMU_CPU_H
 
+#include "softfloat/softfloat.h"
+
 enum {
     FPU_NONE,
     FPU_8087,
@@ -404,6 +406,20 @@ typedef struct {
     uint8_t inside_emulation_mode;
 } cpu_state_t;
 
+typedef struct {
+    uint16_t cwd;
+    uint16_t swd;
+    uint16_t tag;
+    uint16_t foo;
+    uint32_t fip;
+    uint32_t fdp;
+    uint16_t fcs;
+    uint16_t fds;
+    floatx80 st_space[8];
+    unsigned char tos;
+    unsigned char align1, align2, align3;
+} fpu_state_t;
+
 #define in_smm   cpu_state._in_smm
 #define smi_line cpu_state._smi_line
 
@@ -416,7 +432,11 @@ typedef struct {
 #define CPU_STATUS_PMODE   (1 << 2)
 #define CPU_STATUS_V86     (1 << 3)
 #define CPU_STATUS_SMM     (1 << 4)
+#ifdef USE_NEW_DYNAREC
+#define CPU_STATUS_FLAGS   0xff
+#else
 #define CPU_STATUS_FLAGS   0xffff
+#endif
 
 /*If the cpu_state.flags below are set in cpu_cur_status, they must be set in block->status.
   Otherwise they are ignored*/
@@ -480,6 +500,7 @@ COMPILE_TIME_ASSERT(sizeof(cpu_state_t) <= 128)
 
 /* Global variables. */
 extern cpu_state_t cpu_state;
+extern fpu_state_t fpu_state;
 
 extern const cpu_family_t         cpu_families[];
 extern const cpu_legacy_machine_t cpu_legacy_table[];
@@ -639,7 +660,7 @@ extern void cpu_RDMSR(void);
 extern void cpu_WRMSR(void);
 extern void cpu_INVD(uint8_t wb);
 
-extern int  checkio(uint32_t port);
+extern int  checkio(uint32_t port, int mask);
 extern void codegen_block_end(void);
 extern void codegen_reset(void);
 extern void cpu_set_edx(void);
@@ -735,6 +756,8 @@ extern uint32_t custom_nmi_vector;
 extern void (*cpu_exec)(int cycs);
 extern uint8_t do_translate, do_translate2;
 
+extern void SF_FPU_reset(void);
+
 extern void reset_808x(int hard);
 extern void interrupt_808x(uint16_t addr);
 
diff --git a/src/cpu/softfloat/CMakeLists.txt b/src/cpu/softfloat/CMakeLists.txt
new file mode 100644
index 000000000..936157185
--- /dev/null
+++ b/src/cpu/softfloat/CMakeLists.txt
@@ -0,0 +1,17 @@
+#
+# 86Box    A hypervisor and IBM PC system emulator that specializes in
+#          running old operating systems and software designed for IBM
+#          PC systems and compatibles from 1981 through fairly recent
+#          system designs based on the PCI bus.
+#
+#          This file is part of the 86Box distribution.
+#
+#          CMake build script.
+#
+# Authors: David Hrdlička, <hrdlickadavid@outlook.com>
+#
+#          Copyright 2020-2021 David Hrdlička.
+#
+
+add_library(softfloat OBJECT f2xm1.cc fpatan.cc fprem.cc fsincos.cc fyl2x.cc softfloat_poly.cc softfloat.cc softfloat16.cc
+		softfloat-muladd.cc softfloat-round-pack.cc softfloat-specialize.cc softfloatx80.cc)
diff --git a/src/cpu/softfloat/config.h b/src/cpu/softfloat/config.h
new file mode 100644
index 000000000..3889b5c02
--- /dev/null
+++ b/src/cpu/softfloat/config.h
@@ -0,0 +1,46 @@
+#include <stdint.h>
+
+typedef int8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified.  For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8_t bits8;
+typedef int8_t sbits8;
+typedef uint16_t bits16;
+typedef int16_t sbits16;
+typedef uint32_t bits32;
+typedef int32_t sbits32;
+typedef uint64_t bits64;
+typedef int64_t sbits64;
+
+typedef uint8_t Bit8u;
+typedef int8_t Bit8s;
+typedef uint16_t Bit16u;
+typedef int16_t Bit16s;
+typedef uint32_t Bit32u;
+typedef int32_t Bit32s;
+typedef uint64_t Bit64u;
+typedef int64_t Bit64s;
+
+/*----------------------------------------------------------------------------
+| The `LIT64' macro takes as its argument a textual integer literal and
+| if necessary ``marks'' the literal as having a 64-bit integer type.
+| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
+| appended with the letters `LL' standing for `long long', which is `gcc's
+| name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+| defined as the identity macro:  `#define LIT64( a ) a'.
+*----------------------------------------------------------------------------*/
+#define BX_CONST64(a) a##LL
+#define BX_CPP_INLINE static __inline
diff --git a/src/cpu/softfloat/f2xm1.cc b/src/cpu/softfloat/f2xm1.cc
new file mode 100644
index 000000000..ed4af1d12
--- /dev/null
+++ b/src/cpu/softfloat/f2xm1.cc
@@ -0,0 +1,182 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+
+static const floatx80 floatx80_negone  = packFloatx80(1, 0x3fff, BX_CONST64(0x8000000000000000));
+static const floatx80 floatx80_neghalf = packFloatx80(1, 0x3ffe, BX_CONST64(0x8000000000000000));
+static const float128 float128_ln2     =
+    packFloat128(BX_CONST64(0x3ffe62e42fefa39e), BX_CONST64(0xf35793c7673007e6));
+
+#ifdef BETTER_THAN_PENTIUM
+
+#define LN2_SIG_HI BX_CONST64(0xb17217f7d1cf79ab)
+#define LN2_SIG_LO BX_CONST64(0xc9e3b39800000000)  /* 96 bit precision */
+
+#else
+
+#define LN2_SIG_HI BX_CONST64(0xb17217f7d1cf79ab)
+#define LN2_SIG_LO BX_CONST64(0xc000000000000000)  /* 67-bit precision */
+
+#endif
+
+#define EXP_ARR_SIZE 15
+
+static float128 exp_arr[EXP_ARR_SIZE] =
+{
+    PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+    PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /*  2 */
+    PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /*  3 */
+    PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
+    PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
+    PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
+    PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
+    PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
+    PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
+    PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
+    PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
+    PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
+    PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
+    PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
+    PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0)  /* 15 */
+};
+
+extern float128 EvalPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
+
+/* required -1 < x < 1 */
+static float128 poly_exp(float128 x, struct float_status_t *status)
+{
+/*
+    //               2     3     4     5     6     7     8     9
+    //  x           x     x     x     x     x     x     x     x
+    // e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ...
+    //              2!    3!    4!    5!    6!    7!    8!    9!
+    //
+    //                     2     3     4     5     6     7     8
+    //              x     x     x     x     x     x     x     x
+    //   = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ]
+    //              2!    3!    4!    5!    6!    7!    8!    9!
+    //
+    //           8                          8
+    //          --       2k                --        2k+1
+    //   p(x) = >  C  * x           q(x) = >  C   * x
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //    x
+    //   e  - 1 ~ x * [ p(x) + x * q(x) ]
+    //
+*/
+    float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE, status);
+    return float128_mul(t, x, status);
+}
+
+// =================================================
+//                                  x
+// FX2M1                   Compute 2  - 1
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//      x    x*ln(2)
+//     2  = e
+//
+// 2. ----------------------------------------------------------
+//                      2     3     4     5           n
+//      x        x     x     x     x     x           x
+//     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
+//               1!    2!    3!    4!    5!          n!
+//
+
+floatx80 f2xm1(floatx80 a, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    Bit64u zSig0, zSig1, zSig2;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1))
+            return propagateFloatx80NaNOne(a, status);
+
+        return (aSign) ? floatx80_negone : a;
+    }
+
+    if (aExp == 0) {
+        if (aSig == 0) return a;
+        float_raise(status, float_flag_denormal | float_flag_inexact);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+
+    tiny_argument:
+        mul128By64To192(LN2_SIG_HI, LN2_SIG_LO, aSig, &zSig0, &zSig1, &zSig2);
+        if (0 < (Bit64s) zSig0) {
+            shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+            --aExp;
+        }
+        return
+            roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1, status);
+    }
+
+    float_raise(status, float_flag_inexact);
+
+    if (aExp < 0x3FFF)
+    {
+        if (aExp < FLOATX80_EXP_BIAS-68)
+            goto tiny_argument;
+
+        /* ******************************** */
+        /* using float128 for approximation */
+        /* ******************************** */
+
+        float128 x = floatx80_to_float128(a, status);
+        x = float128_mul(x, float128_ln2, status);
+        x = poly_exp(x, status);
+        return float128_to_floatx80(x, status);
+    }
+    else
+    {
+        if (a.exp == 0xBFFF && ! (aSig<<1))
+           return floatx80_neghalf;
+
+        return a;
+    }
+}
diff --git a/src/cpu/softfloat/fpatan.cc b/src/cpu/softfloat/fpatan.cc
new file mode 100644
index 000000000..f33a3ff66
--- /dev/null
+++ b/src/cpu/softfloat/fpatan.cc
@@ -0,0 +1,288 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "fpu_constant.h"
+
+#define FPATAN_ARR_SIZE 11
+
+static const float128 float128_one =
+        packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
+static const float128 float128_sqrt3 =
+        packFloat128(BX_CONST64(0x3fffbb67ae8584ca), BX_CONST64(0xa73b25742d7078b8));
+static const floatx80 floatx80_pi  =
+        packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
+
+static const float128 float128_pi2 =
+        packFloat128(BX_CONST64(0x3fff921fb54442d1), BX_CONST64(0x8469898CC5170416));
+static const float128 float128_pi4 =
+        packFloat128(BX_CONST64(0x3ffe921fb54442d1), BX_CONST64(0x8469898CC5170416));
+static const float128 float128_pi6 =
+        packFloat128(BX_CONST64(0x3ffe0c152382d736), BX_CONST64(0x58465BB32E0F580F));
+
+static float128 atan_arr[FPATAN_ARR_SIZE] =
+{
+    PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+    PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /*  3 */
+    PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
+    PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /*  7 */
+    PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
+    PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
+    PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
+    PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */
+    PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */
+    PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */
+    PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618)  /* 21 */
+};
+
+extern float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
+
+/* |x| < 1/4 */
+static float128 poly_atan(float128 x1, struct float_status_t *status)
+{
+/*
+    //                 3     5     7     9     11     13     15     17
+    //                x     x     x     x     x      x      x      x
+    // atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ----
+    //                3     5     7     9     11     13     15     17
+    //
+    //                 2     4     6     8     10     12     14     16
+    //                x     x     x     x     x      x      x      x
+    //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ]
+    //                3     5     7     9     11     13     15     17
+    //
+    //           5                          5
+    //          --       4k                --        4k+2
+    //   p(x) = >  C  * x           q(x) = >  C   * x
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //                            2
+    //    atan(x) ~ x * [ p(x) + x * q(x) ]
+    //
+*/
+    return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE, status);
+}
+
+// =================================================
+// FPATAN                  Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//   atan(-x) = -atan(x)
+//
+// 2. ----------------------------------------------------------
+//
+//                             x + y
+//   atan(x) + atan(y) = atan -------, xy < 1
+//                             1-xy
+//
+//                             x + y
+//   atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1
+//                             1-xy
+//
+//                             x + y
+//   atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1
+//                             1-xy
+//
+// 3. ----------------------------------------------------------
+//
+//   atan(x) = atan(INF) + atan(- 1/x)
+//
+//                           x-1
+//   atan(x) = PI/4 + atan( ----- )
+//                           x+1
+//
+//                           x * sqrt(3) - 1
+//   atan(x) = PI/6 + atan( ----------------- )
+//                             x + sqrt(3)
+//
+// 4. ----------------------------------------------------------
+//                   3     5     7     9                 2n+1
+//                  x     x     x     x              n  x
+//   atan(x) = x - --- + --- - --- + --- - ... + (-1)  ------ + ...
+//                  3     5     7     9                 2n+1
+//
+
+floatx80 fpatan(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    Bit64u bSig = extractFloatx80Frac(b);
+    Bit32s bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    int zSign = aSign ^ bSign;
+
+    if (bExp == 0x7FFF)
+    {
+        if ((Bit64u) (bSig<<1))
+            return propagateFloatx80NaN(a, b, status);
+
+        if (aExp == 0x7FFF) {
+            if ((Bit64u) (aSig<<1))
+                return propagateFloatx80NaN(a, b, status);
+
+            if (aSign) {   /* return 3PI/4 */
+                return roundAndPackFloatx80(80, bSign,
+                        FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO, status);
+            }
+            else {         /* return  PI/4 */
+                return roundAndPackFloatx80(80, bSign,
+                        FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+            }
+        }
+
+        if (aSig && (aExp == 0))
+            float_raise(status, float_flag_denormal);
+
+        /* return PI/2 */
+        return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+    }
+    if (aExp == 0x7FFF)
+    {
+        if ((Bit64u) (aSig<<1))
+            return propagateFloatx80NaN(a, b, status);
+
+        if (bSig && (bExp == 0))
+            float_raise(status, float_flag_denormal);
+
+return_PI_or_ZERO:
+
+        if (aSign) {   /* return PI */
+            return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+        } else {       /* return  0 */
+            return packFloatx80(bSign, 0, 0);
+        }
+    }
+    if (bExp == 0)
+    {
+        if (bSig == 0) {
+             if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+             goto return_PI_or_ZERO;
+        }
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0)
+    {
+        if (aSig == 0)   /* return PI/2 */
+            return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+
+    float_raise(status, float_flag_inexact);
+
+    /* |a| = |b| ==> return PI/4 */
+    if (aSig == bSig && aExp == bExp)
+        return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+
+    /* ******************************** */
+    /* using float128 for approximation */
+    /* ******************************** */
+
+    float128 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0, status);
+    float128 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0, status);
+    float128 x;
+    int swap = 0, add_pi6 = 0, add_pi4 = 0;
+
+    if (aExp > bExp || (aExp == bExp && aSig > bSig))
+    {
+        x = float128_div(b128, a128, status);
+    }
+    else {
+        x = float128_div(a128, b128, status);
+        swap = 1;
+    }
+
+    Bit32s xExp = extractFloat128Exp(x);
+
+    if (xExp <= FLOATX80_EXP_BIAS-40)
+        goto approximation_completed;
+
+    if (x.hi >= BX_CONST64(0x3ffe800000000000))        // 3/4 < x < 1
+    {
+        /*
+        arctan(x) = arctan((x-1)/(x+1)) + pi/4
+        */
+        float128 t1 = float128_sub(x, float128_one, status);
+        float128 t2 = float128_add(x, float128_one, status);
+        x = float128_div(t1, t2, status);
+        add_pi4 = 1;
+    }
+    else
+    {
+        /* argument correction */
+        if (xExp >= 0x3FFD)                     // 1/4 < x < 3/4
+        {
+            /*
+            arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6
+            */
+            float128 t1 = float128_mul(x, float128_sqrt3, status);
+            float128 t2 = float128_add(x, float128_sqrt3, status);
+            x = float128_sub(t1, float128_one, status);
+            x = float128_div(x, t2, status);
+            add_pi6 = 1;
+        }
+    }
+
+    x = poly_atan(x, status);
+    if (add_pi6) x = float128_add(x, float128_pi6, status);
+    if (add_pi4) x = float128_add(x, float128_pi4, status);
+
+approximation_completed:
+    if (swap) x = float128_sub(float128_pi2, x, status);
+    floatx80 result = float128_to_floatx80(x, status);
+    if (zSign) floatx80_chs(result);
+    int rSign = extractFloatx80Sign(result);
+    if (!bSign && rSign)
+        return floatx80_add(result, floatx80_pi, status);
+    if (bSign && !rSign)
+        return floatx80_sub(result, floatx80_pi, status);
+    return result;
+}
diff --git a/src/cpu/softfloat/fprem.cc b/src/cpu/softfloat/fprem.cc
new file mode 100644
index 000000000..26637c5c5
--- /dev/null
+++ b/src/cpu/softfloat/fprem.cc
@@ -0,0 +1,196 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#define USE_estimateDiv128To64
+#include "softfloat-macros.h"
+
+/* executes single exponent reduction cycle */
+static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1)
+{
+    Bit64u term0, term1;
+    Bit64u aSig1 = 0;
+
+    shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0);
+    Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig);
+    mul64To128(bSig, q, &term0, &term1);
+    sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+    while ((Bit64s)(*zSig1) < 0) {
+        --q;
+        add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0);
+    }
+    return q;
+}
+
+static int do_fprem(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, int rounding_mode, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit32s aExp, bExp, zExp, expDiff;
+    Bit64u aSig0, aSig1, bSig;
+    int aSign;
+    *q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        *r = floatx80_default_nan;
+        return -1;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) {
+            *r = propagateFloatx80NaN(a, b, status);
+            return -1;
+        }
+        float_raise(status, float_flag_invalid);
+        *r = floatx80_default_nan;
+        return -1;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) {
+            *r = propagateFloatx80NaN(a, b, status);
+            return -1;
+        }
+        if (aExp == 0 && aSig0) {
+            float_raise(status, float_flag_denormal);
+            normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+            *r = (a.fraction & BX_CONST64(0x8000000000000000)) ?
+                    packFloatx80(aSign, aExp, aSig0) : a;
+            return 0;
+        }
+        *r = a;
+        return 0;
+
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            float_raise(status, float_flag_invalid);
+            *r = floatx80_default_nan;
+            return -1;
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig0 == 0) {
+            *r = a;
+            return 0;
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+
+    Bit32u overflow = 0;
+
+    if (expDiff >= 64) {
+        int n = (expDiff & 0x1f) | 0x20;
+        remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
+        zExp = aExp - n;
+        overflow = 1;
+    }
+    else {
+        zExp = bExp;
+
+        if (expDiff < 0) {
+            if (expDiff < -1) {
+               *r = (a.fraction & BX_CONST64(0x8000000000000000)) ?
+                    packFloatx80(aSign, aExp, aSig0) : a;
+               return 0;
+            }
+            shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
+            expDiff = 0;
+        }
+
+        if (expDiff > 0) {
+            *q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
+        }
+        else {
+            if (bSig <= aSig0) {
+               aSig0 -= bSig;
+               *q = 1;
+            }
+        }
+
+        if (rounding_mode == float_round_nearest_even)
+        {
+            Bit64u term0, term1;
+            shift128Right(bSig, 0, 1, &term0, &term1);
+
+            if (! lt128(aSig0, aSig1, term0, term1))
+            {
+               int lt = lt128(term0, term1, aSig0, aSig1);
+               int eq = eq128(aSig0, aSig1, term0, term1);
+
+               if ((eq && ((*q) & 1)) || lt) {
+                  aSign = !aSign;
+                  ++(*q);
+               }
+               if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
+            }
+        }
+    }
+
+    *r = normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status);
+    return overflow;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status)
+{
+    return do_fprem(a, b, r, q, float_round_nearest_even, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with  respect to  the corresponding value `b'. Unlike previous function
+| the  function  does not compute  the remainder  specified  in  the IEC/IEEE
+| Standard  for Binary  Floating-Point  Arithmetic.  This  function  operates
+| differently  from the  previous  function in  the way  that it  rounds  the
+| quotient of 'a' divided by 'b' to an integer.
+*----------------------------------------------------------------------------*/
+
+int floatx80_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status)
+{
+    return do_fprem(a, b, r, q, float_round_to_zero, status);
+}
diff --git a/src/cpu/softfloat/fpu_constant.h b/src/cpu/softfloat/fpu_constant.h
new file mode 100644
index 000000000..7a7fc6f1a
--- /dev/null
+++ b/src/cpu/softfloat/fpu_constant.h
@@ -0,0 +1,82 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _FPU_CONSTANTS_H_
+#define _FPU_CONSTANTS_H_
+
+#include "config.h"
+
+// Pentium CPU uses only 68-bit precision M_PI approximation
+//#define BETTER_THAN_PENTIUM
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+//////////////////////////////
+// PI, PI/2, PI/4 constants
+//////////////////////////////
+
+#define FLOATX80_PI_EXP  (0x4000)
+
+// 128-bit PI fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_PI_HI (BX_CONST64(0xc90fdaa22168c234))
+#define FLOAT_PI_LO (BX_CONST64(0xc4c6628b80dc1cd1))
+#else
+#define FLOAT_PI_HI (BX_CONST64(0xc90fdaa22168c234))
+#define FLOAT_PI_LO (BX_CONST64(0xC000000000000000))
+#endif
+
+#define FLOATX80_PI2_EXP  (0x3FFF)
+#define FLOATX80_PI4_EXP  (0x3FFE)
+
+//////////////////////////////
+// 3PI/4 constant
+//////////////////////////////
+
+#define FLOATX80_3PI4_EXP (0x4000)
+
+// 128-bit 3PI/4 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7))
+#define FLOAT_3PI4_LO (BX_CONST64(0x9394c9e8a0a5159c))
+#else
+#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7))
+#define FLOAT_3PI4_LO (BX_CONST64(0x9000000000000000))
+#endif
+
+//////////////////////////////
+// 1/LN2 constant
+//////////////////////////////
+
+#define FLOAT_LN2INV_EXP  (0x3FFF)
+
+// 128-bit 1/LN2 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_LN2INV_HI (BX_CONST64(0xb8aa3b295c17f0bb))
+#define FLOAT_LN2INV_LO (BX_CONST64(0xbe87fed0691d3e89))
+#else
+#define FLOAT_LN2INV_HI (BX_CONST64(0xb8aa3b295c17f0bb))
+#define FLOAT_LN2INV_LO (BX_CONST64(0xC000000000000000))
+#endif
+
+#endif
diff --git a/src/cpu/softfloat/fsincos.cc b/src/cpu/softfloat/fsincos.cc
new file mode 100644
index 000000000..f5b33a823
--- /dev/null
+++ b/src/cpu/softfloat/fsincos.cc
@@ -0,0 +1,441 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#define USE_estimateDiv128To64
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "fpu_constant.h"
+
+static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+
+/* reduce trigonometric function argument using 128-bit precision
+   M_PI approximation */
+static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
+{
+    Bit64u term0, term1, term2;
+    Bit64u aSig1 = 0;
+
+    shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
+    Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
+    mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
+    sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+    while ((Bit64s)(*zSig1) < 0) {
+        --q;
+        add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
+    }
+    *zSig1 = term2;
+    return q;
+}
+
+static int reduce_trig_arg(int expDiff, int *zSign, Bit64u *aSig0, Bit64u *aSig1)
+{
+    Bit64u term0, term1, q = 0;
+
+    if (expDiff < 0) {
+        shift128Right(*aSig0, 0, 1, aSig0, aSig1);
+        expDiff = 0;
+    }
+    if (expDiff > 0) {
+        q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1);
+    }
+    else {
+        if (FLOAT_PI_HI <= *aSig0) {
+            *aSig0 -= FLOAT_PI_HI;
+            q = 1;
+        }
+    }
+
+    shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
+    if (! lt128(*aSig0, *aSig1, term0, term1))
+    {
+        int lt = lt128(term0, term1, *aSig0, *aSig1);
+        int eq = eq128(*aSig0, *aSig1, term0, term1);
+
+        if ((eq && (q & 1)) || lt) {
+            *zSign = !(*zSign);
+            ++q;
+        }
+        if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1);
+    }
+
+    return (int)(q & 3);
+}
+
+#define SIN_ARR_SIZE 11
+#define COS_ARR_SIZE 11
+
+static float128 sin_arr[SIN_ARR_SIZE] =
+{
+    PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+    PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
+    PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
+    PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
+    PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
+    PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
+    PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
+    PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
+    PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
+    PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
+    PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45)  /* 21 */
+};
+
+static float128 cos_arr[COS_ARR_SIZE] =
+{
+    PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
+    PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
+    PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
+    PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
+    PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
+    PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
+    PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
+    PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
+    PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
+    PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
+    PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb)  /* 20 */
+};
+
+extern float128 OddPoly (float128 x, float128 *arr, int n, struct float_status_t *status);
+
+/* 0 <= x <= pi/4 */
+BX_CPP_INLINE float128 poly_sin(float128 x, struct float_status_t *status)
+{
+    //                 3     5     7     9     11     13     15
+    //                x     x     x     x     x      x      x
+    // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
+    //                3!    5!    7!    9!    11!    13!    15!
+    //
+    //                 2     4     6     8     10     12     14
+    //                x     x     x     x     x      x      x
+    //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
+    //                3!    5!    7!    9!    11!    13!    15!
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //                          2
+    //   sin(x) ~ x * [ p(x) + x * q(x) ]
+    //
+
+    return OddPoly(x, sin_arr, SIN_ARR_SIZE, status);
+}
+
+extern float128 EvenPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
+
+/* 0 <= x <= pi/4 */
+BX_CPP_INLINE float128 poly_cos(float128 x, struct float_status_t *status)
+{
+    //                 2     4     6     8     10     12     14
+    //                x     x     x     x     x      x      x
+    // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
+    //                2!    4!    6!    8!    10!    12!    14!
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //                      2
+    //   cos(x) ~ [ p(x) + x * q(x) ]
+    //
+
+    return EvenPoly(x, cos_arr, COS_ARR_SIZE, status);
+}
+
+BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+    if (sin_a) *sin_a = a;
+    if (cos_a) *cos_a = a;
+}
+
+BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+    if (sin_a) *sin_a = a;
+    if (cos_a) *cos_a = floatx80_one;
+}
+
+static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, struct float_status_t *status)
+{
+    if (quotient & 0x1) {
+        r = poly_cos(r, status);
+        neg = 0;
+    } else  {
+        r = poly_sin(r, status);
+    }
+
+    floatx80 result = float128_to_floatx80(r, status);
+    if (quotient & 0x2)
+        neg = ! neg;
+
+    if (neg)
+        floatx80_chs(result);
+
+    return result;
+}
+
+// =================================================
+// FSINCOS               Compute sin(x) and cos(x)
+// =================================================
+
+//
+// Uses the following identities:
+// ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+
+int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit64u aSig0, aSig1 = 0;
+    Bit32s aExp, zExp, expDiff;
+    int aSign, zSign;
+    int q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) {
+        goto invalid;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+
+    /* invalid argument */
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1)) {
+            sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOne(a, status));
+            return 0;
+        }
+
+    invalid:
+        float_raise(status, float_flag_invalid);
+        sincos_invalid(sin_a, cos_a, floatx80_default_nan);
+        return 0;
+    }
+
+    if (aExp == 0) {
+        if (aSig0 == 0) {
+            sincos_tiny_argument(sin_a, cos_a, a);
+            return 0;
+        }
+
+        float_raise(status, float_flag_denormal);
+
+        /* handle pseudo denormals */
+        if (! (aSig0 & BX_CONST64(0x8000000000000000)))
+        {
+            float_raise(status, float_flag_inexact);
+            if (sin_a)
+                float_raise(status, float_flag_underflow);
+            sincos_tiny_argument(sin_a, cos_a, a);
+            return 0;
+        }
+
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+
+    zSign = aSign;
+    zExp = FLOATX80_EXP_BIAS;
+    expDiff = aExp - zExp;
+
+    /* argument is out-of-range */
+    if (expDiff >= 63)
+        return -1;
+
+    float_raise(status, float_flag_inexact);
+
+    if (expDiff < -1) {    // doesn't require reduction
+        if (expDiff <= -68) {
+            a = packFloatx80(aSign, aExp, aSig0);
+            sincos_tiny_argument(sin_a, cos_a, a);
+            return 0;
+        }
+        zExp = aExp;
+    }
+    else {
+        q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
+    }
+
+    /* **************************** */
+    /* argument reduction completed */
+    /* **************************** */
+
+    /* using float128 for approximation */
+    float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
+
+    if (aSign) q = -q;
+    if (sin_a) *sin_a = sincos_approximation(zSign, r,   q, status);
+    if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status);
+
+    return 0;
+}
+
+int fsin(floatx80 *a, struct float_status_t *status)
+{
+    return fsincos(*a, a, 0, status);
+}
+
+int fcos(floatx80 *a, struct float_status_t *status)
+{
+    return fsincos(*a, 0, a, status);
+}
+
+// =================================================
+// FPTAN                 Compute tan(x)
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+// 2. ----------------------------------------------------------
+//
+//           sin(x)
+//  tan(x) = ------
+//           cos(x)
+//
+
+int ftan(floatx80 *a, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit64u aSig0, aSig1 = 0;
+    Bit32s aExp, zExp, expDiff;
+    int aSign, zSign;
+    int q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(*a)) {
+        goto invalid;
+    }
+
+    aSig0 = extractFloatx80Frac(*a);
+    aExp = extractFloatx80Exp(*a);
+    aSign = extractFloatx80Sign(*a);
+
+    /* invalid argument */
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1))
+        {
+            *a = propagateFloatx80NaNOne(*a, status);
+            return 0;
+        }
+
+    invalid:
+        float_raise(status, float_flag_invalid);
+        *a = floatx80_default_nan;
+        return 0;
+    }
+
+    if (aExp == 0) {
+        if (aSig0 == 0) return 0;
+        float_raise(status, float_flag_denormal);
+        /* handle pseudo denormals */
+        if (! (aSig0 & BX_CONST64(0x8000000000000000)))
+        {
+            float_raise(status, float_flag_inexact | float_flag_underflow);
+            return 0;
+        }
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+
+    zSign = aSign;
+    zExp = FLOATX80_EXP_BIAS;
+    expDiff = aExp - zExp;
+
+    /* argument is out-of-range */
+    if (expDiff >= 63)
+        return -1;
+
+    float_raise(status, float_flag_inexact);
+
+    if (expDiff < -1) {    // doesn't require reduction
+        if (expDiff <= -68) {
+            *a = packFloatx80(aSign, aExp, aSig0);
+            return 0;
+        }
+        zExp = aExp;
+    }
+    else {
+        q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
+    }
+
+    /* **************************** */
+    /* argument reduction completed */
+    /* **************************** */
+
+    /* using float128 for approximation */
+    float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
+
+    float128 sin_r = poly_sin(r, status);
+    float128 cos_r = poly_cos(r, status);
+
+    if (q & 0x1) {
+        r = float128_div(cos_r, sin_r, status);
+        zSign = ! zSign;
+    } else {
+        r = float128_div(sin_r, cos_r, status);
+    }
+
+    *a = float128_to_floatx80(r, status);
+    if (zSign)
+        floatx80_chs(*a);
+
+    return 0;
+}
diff --git a/src/cpu/softfloat/fyl2x.cc b/src/cpu/softfloat/fyl2x.cc
new file mode 100644
index 000000000..875f866a9
--- /dev/null
+++ b/src/cpu/softfloat/fyl2x.cc
@@ -0,0 +1,363 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "fpu_constant.h"
+
+static const floatx80 floatx80_one =
+    packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+
+static const float128 float128_one =
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
+static const float128 float128_two =
+    packFloat128(BX_CONST64(0x4000000000000000), BX_CONST64(0x0000000000000000));
+
+static const float128 float128_ln2inv2 =
+    packFloat128(BX_CONST64(0x400071547652b82f), BX_CONST64(0xe1777d0ffda0d23a));
+
+#define SQRT2_HALF_SIG 	BX_CONST64(0xb504f333f9de6484)
+
+extern float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
+
+#define L2_ARR_SIZE 9
+
+static float128 ln_arr[L2_ARR_SIZE] =
+{
+    PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+    PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
+    PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
+    PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
+    PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
+    PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
+    PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
+    PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
+    PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
+};
+
+static float128 poly_ln(float128 x1, struct float_status_t *status)
+{
+/*
+    //
+    //                     3     5     7     9     11     13     15
+    //        1+u         u     u     u     u     u      u      u
+    // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
+    //        1-u         3     5     7     9     11     13     15
+    //
+    //                     2     4     6     8     10     12     14
+    //                    u     u     u     u     u      u      u
+    //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
+    //                    3     5     7     9     11     13     15
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(u) = >  C  * u           q(u) = >  C   * u
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //          1+u                 2
+    //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
+    //          1-u
+    //
+*/
+    return OddPoly(x1, ln_arr, L2_ARR_SIZE, status);
+}
+
+/* required sqrt(2)/2 < x < sqrt(2) */
+static float128 poly_l2(float128 x, struct float_status_t *status)
+{
+    /* using float128 for approximation */
+    float128 x_p1 = float128_add(x, float128_one, status);
+    float128 x_m1 = float128_sub(x, float128_one, status);
+    x = float128_div(x_m1, x_p1, status);
+    x = poly_ln(x, status);
+    x = float128_mul(x, float128_ln2inv2, status);
+    return x;
+}
+
+static float128 poly_l2p1(float128 x, struct float_status_t *status)
+{
+    /* using float128 for approximation */
+    float128 x_p2 = float128_add(x, float128_two, status);
+    x = float128_div(x, x_p2, status);
+    x = poly_ln(x, status);
+    x = float128_mul(x, float128_ln2inv2, status);
+    return x;
+}
+
+// =================================================
+// FYL2X                   Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                1+u             x-1
+//   ln (x) = ln -----, when u = -----
+//                1-u             x+1
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2x(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
+invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    Bit64u bSig = extractFloatx80Frac(b);
+    Bit32s bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    int zSign = bSign ^ 1;
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)
+             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if (aSign) goto invalid;
+        else {
+            if (bExp == 0) {
+                if (bSig == 0) goto invalid;
+                float_raise(status, float_flag_denormal);
+            }
+            return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+    }
+    if (bExp == 0x7FFF)
+    {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSign && (Bit64u)(aExp | aSig)) goto invalid;
+        if (aSig && (aExp == 0))
+            float_raise(status, float_flag_denormal);
+        if (aExp < 0x3FFF) {
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid;
+        return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if ((bExp | bSig) == 0) goto invalid;
+            float_raise(status, float_flag_divbyzero);
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (aSign) goto invalid;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (aSign) goto invalid;
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
+            return packFloatx80(bSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0))
+        return packFloatx80(bSign, 0, 0);
+
+    float_raise(status, float_flag_inexact);
+
+    int ExpDiff = aExp - 0x3FFF;
+    aExp = 0;
+    if (aSig >= SQRT2_HALF_SIG) {
+        ExpDiff++;
+        aExp--;
+    }
+
+    /* ******************************** */
+    /* using float128 for approximation */
+    /* ******************************** */
+
+    Bit64u zSig0, zSig1;
+    shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+    float128 x = packFloat128Four(0, aExp+0x3FFF, zSig0, zSig1);
+    x = poly_l2(x, status);
+    x = float128_add(x, int64_to_float128((Bit64s) ExpDiff), status);
+    return floatx80_128_mul(b, x, status);
+}
+
+// =================================================
+// FYL2XP1                 Compute y * log (x + 1)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                  1+u              x
+//   ln (x+1) = ln -----, when u = -----
+//                  1-u             x+2
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2xp1(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit32s aExp, bExp;
+    Bit64u aSig, bSig, zSig0, zSig1, zSig2;
+    int aSign, bSign;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
+invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    bSign = extractFloatx80Sign(b);
+    int zSign = aSign ^ bSign;
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)
+             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if (aSign) goto invalid;
+        else {
+            if (bExp == 0) {
+                if (bSig == 0) goto invalid;
+                float_raise(status, float_flag_denormal);
+            }
+            return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+    }
+    if (bExp == 0x7FFF)
+    {
+        if ((Bit64u) (bSig<<1))
+            return propagateFloatx80NaN(a, b, status);
+
+        if (aExp == 0) {
+            if (aSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloatx80(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+
+    float_raise(status, float_flag_inexact);
+
+    if (aSign && aExp >= 0x3FFF)
+        return a;
+
+    if (aExp >= 0x3FFC) // big argument
+    {
+        return fyl2x(floatx80_add(a, floatx80_one, status), b, status);
+    }
+
+    // handle tiny argument
+    if (aExp < FLOATX80_EXP_BIAS-70)
+    {
+        // first order approximation, return (a*b)/ln(2)
+        Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
+
+	mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
+        if (0 < (Bit64s) zSig0) {
+            shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+            --zExp;
+        }
+
+        zExp = zExp + bExp - 0x3FFE;
+	mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
+        if (0 < (Bit64s) zSig0) {
+            shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+            --zExp;
+        }
+
+        return
+            roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1, status);
+    }
+
+    /* ******************************** */
+    /* using float128 for approximation */
+    /* ******************************** */
+
+    shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+    float128 x = packFloat128Four(aSign, aExp, zSig0, zSig1);
+    x = poly_l2p1(x, status);
+    return floatx80_128_mul(b, x, status);
+}
diff --git a/src/cpu/softfloat/softfloat-compare.h b/src/cpu/softfloat/softfloat-compare.h
new file mode 100644
index 000000000..8b9821460
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-compare.h
@@ -0,0 +1,496 @@
+/*============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_COMPARE_H_
+#define _SOFTFLOAT_COMPARE_H_
+
+#include "softfloat.h"
+
+// ======= float32 ======= //
+
+typedef int (*float32_compare_method)(float32, float32, struct float_status_t *status);
+
+// 0x00
+BX_CPP_INLINE int float32_eq_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x01
+BX_CPP_INLINE int float32_lt_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x02
+BX_CPP_INLINE int float32_le_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x03
+BX_CPP_INLINE int float32_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x04
+BX_CPP_INLINE int float32_neq_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x05
+BX_CPP_INLINE int float32_nlt_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x06
+BX_CPP_INLINE int float32_nle_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x07
+BX_CPP_INLINE int float32_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x08
+BX_CPP_INLINE int float32_eq_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x09
+BX_CPP_INLINE int float32_nge_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x0a
+BX_CPP_INLINE int float32_ngt_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x0b
+BX_CPP_INLINE int float32_false_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   float32_compare_quiet(a, b, status);
+   return 0;
+}
+
+// 0x0c
+BX_CPP_INLINE int float32_neq_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x0d
+BX_CPP_INLINE int float32_ge_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x0e
+BX_CPP_INLINE int float32_gt_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x0f
+BX_CPP_INLINE int float32_true_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   float32_compare_quiet(a, b, status);
+   return 1;
+}
+
+// 0x10
+BX_CPP_INLINE int float32_eq_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x11
+BX_CPP_INLINE int float32_lt_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x12
+BX_CPP_INLINE int float32_le_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x13
+BX_CPP_INLINE int float32_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x14
+BX_CPP_INLINE int float32_neq_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x15
+BX_CPP_INLINE int float32_nlt_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x16
+BX_CPP_INLINE int float32_nle_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x17
+BX_CPP_INLINE int float32_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x18
+BX_CPP_INLINE int float32_eq_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x19
+BX_CPP_INLINE int float32_nge_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x1a
+BX_CPP_INLINE int float32_ngt_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x1b
+BX_CPP_INLINE int float32_false_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   float32_compare_two(a, b, status);
+   return 0;
+}
+
+// 0x1c
+BX_CPP_INLINE int float32_neq_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_two(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x1d
+BX_CPP_INLINE int float32_ge_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x1e
+BX_CPP_INLINE int float32_gt_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x1f
+BX_CPP_INLINE int float32_true_signalling(float32 a, float32 b, struct float_status_t *status)
+{
+   float32_compare_two(a, b, status);
+   return 1;
+}
+
+// ======= float64 ======= //
+
+typedef int (*float64_compare_method)(float64, float64, struct float_status_t *status);
+
+// 0x00
+BX_CPP_INLINE int float64_eq_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x01
+BX_CPP_INLINE int float64_lt_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x02
+BX_CPP_INLINE int float64_le_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x03
+BX_CPP_INLINE int float64_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x04
+BX_CPP_INLINE int float64_neq_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x05
+BX_CPP_INLINE int float64_nlt_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x06
+BX_CPP_INLINE int float64_nle_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x07
+BX_CPP_INLINE int float64_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x08
+BX_CPP_INLINE int float64_eq_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x09
+BX_CPP_INLINE int float64_nge_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x0a
+BX_CPP_INLINE int float64_ngt_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x0b
+BX_CPP_INLINE int float64_false_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   float64_compare_quiet(a, b, status);
+   return 0;
+}
+
+// 0x0c
+BX_CPP_INLINE int float64_neq_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x0d
+BX_CPP_INLINE int float64_ge_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x0e
+BX_CPP_INLINE int float64_gt_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x0f
+BX_CPP_INLINE int float64_true_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   float64_compare_quiet(a, b, status);
+   return 1;
+}
+
+// 0x10
+BX_CPP_INLINE int float64_eq_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x11
+BX_CPP_INLINE int float64_lt_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x12
+BX_CPP_INLINE int float64_le_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x13
+BX_CPP_INLINE int float64_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x14
+BX_CPP_INLINE int float64_neq_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x15
+BX_CPP_INLINE int float64_nlt_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x16
+BX_CPP_INLINE int float64_nle_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x17
+BX_CPP_INLINE int float64_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x18
+BX_CPP_INLINE int float64_eq_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x19
+BX_CPP_INLINE int float64_nge_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x1a
+BX_CPP_INLINE int float64_ngt_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x1b
+BX_CPP_INLINE int float64_false_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   float64_compare_two(a, b, status);
+   return 0;
+}
+
+// 0x1c
+BX_CPP_INLINE int float64_neq_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_two(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x1d
+BX_CPP_INLINE int float64_ge_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x1e
+BX_CPP_INLINE int float64_gt_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x1f
+BX_CPP_INLINE int float64_true_signalling(float64 a, float64 b, struct float_status_t *status)
+{
+   float64_compare_two(a, b, status);
+   return 1;
+}
+
+#endif
diff --git a/src/cpu/softfloat/softfloat-macros.h b/src/cpu/softfloat/softfloat-macros.h
new file mode 100644
index 000000000..cb867bf5d
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-macros.h
@@ -0,0 +1,686 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_MACROS_H_
+#define _SOFTFLOAT_MACROS_H_
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 16, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16u shift16RightJamming(Bit16u a, int count)
+{
+    Bit16u z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 16) {
+        z = (a>>count) | ((a<<((-count) & 15)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
+{
+    Bit32u z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 32) {
+        z = (a>>count) | ((a<<((-count) & 31)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
+{
+    Bit64u z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 64) {
+        z = (a>>count) | ((a << ((-count) & 63)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift64ExtraRightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z0, z1;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1 != 0);
+        z0 = a0>>count;
+    }
+    else {
+        if (count == 64) {
+            z1 = a0 | (a1 != 0);
+        }
+        else {
+            z1 = ((a0 | a1) != 0);
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + (z1 < a1);
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - (a1 < b1);
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit32u aHigh, aLow, bHigh, bLow;
+    Bit64u z0, zMiddleA, zMiddleB, z1;
+
+    aLow = (Bit32u) a;
+    aHigh = (Bit32u)(a>>32);
+    bLow = (Bit32u) b;
+    bHigh = (Bit32u)(b>>32);
+    z1 = ((Bit64u) aLow) * bLow;
+    zMiddleA = ((Bit64u) aLow) * bHigh;
+    zMiddleB = ((Bit64u) aHigh) * bLow;
+    z0 = ((Bit64u) aHigh) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += (z1 < zMiddleA);
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+#ifdef USE_estimateDiv128To64
+static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b)
+{
+    Bit64u b0, b1;
+    Bit64u rem0, rem1, term0, term1;
+    Bit64u z;
+
+    if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
+    b0 = b>>32;
+    z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32;
+    mul64To128(b, z, &term0, &term1);
+    sub128(a0, a1, term0, term1, &rem0, &rem1);
+    while (((Bit64s) rem0) < 0) {
+        z -= BX_CONST64(0x100000000);
+        b1 = b<<32;
+        add128(rem0, rem1, b0, b1, &rem0, &rem1);
+    }
+    rem0 = (rem0<<32) | (rem1>>32);
+    z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+#ifdef USE_estimateSqrt32
+static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
+{
+    static const Bit16u sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const Bit16u sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    Bit32u z;
+
+    int index = (a>>27) & 15;
+    if (aExp & 1) {
+        z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
+        z = ((a / z)<<14) + (z<<15);
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
+        z = a / z + z;
+        z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
+        if (z <= a) return (Bit32u) (((Bit32s) a)>>1);
+    }
+    return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1);
+}
+#endif
+
+static const int countLeadingZeros8[] = {
+  8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 16 is returned.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int countLeadingZeros16(Bit16u a)
+{
+    int shiftCount = 0;
+    if (a < 0x100) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZeros8[a>>8];
+    return shiftCount;
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int countLeadingZeros32(Bit32u a)
+{
+    int shiftCount = 0;
+    if (a < 0x10000) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if (a < 0x1000000) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZeros8[a>>24];
+    return shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
+{
+    int shiftCount = 0;
+    if (a < BX_CONST64(0x100000000)) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32((Bit32u)(a));
+    return shiftCount;
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z0, z1;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1>>count);
+        z0 = a0>>count;
+    }
+    else {
+        z1 = (count < 128) ? (a0>>(count & 63)) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift128RightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z0, z1;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
+        z0 = a0>>count;
+    }
+    else {
+        if (count == 64) {
+            z1 = a0 | (a1 != 0);
+        }
+        else if (count < 128) {
+            z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
+        }
+        else {
+            z1 = ((a0 | a1) != 0);
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shortShift128Left(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    *z1Ptr = a1<<count;
+    *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void add192(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     Bit64u b0,
+     Bit64u b1,
+     Bit64u b2,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    unsigned carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = (z2 < a2);
+    z1 = a1 + b1;
+    carry0 = (z1 < a1);
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += (z1 < carry1);
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void sub192(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     Bit64u b0,
+     Bit64u b1,
+     Bit64u b2,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    unsigned borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = (a2 < b2);
+    z1 = a1 - b1;
+    borrow0 = (a1 < b1);
+    z0 = a0 - b0;
+    z0 -= (z1 < borrow1);
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
+{
+    return (a0 == b0) && (a1 == b1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
+{
+    return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
+{
+    return (a0 < b0) || ((a0 == b0) && (a1 < b1));
+}
+
+#endif	/* FLOATX80 */
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void mul128By64To192(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u b,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2, more1;
+
+    mul64To128(a1, b, &z1, &z2);
+    mul64To128(a0, b, &z0, &more1);
+    add128(z0, more1, 0, z1, &z0, &z1);
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void mul128To256(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u b0,
+     Bit64u b1,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr,
+     Bit64u *z3Ptr
+)
+{
+    Bit64u z0, z1, z2, z3;
+    Bit64u more1, more2;
+
+    mul64To128(a1, b1, &z2, &z3);
+    mul64To128(a1, b0, &z1, &more2);
+    add128(z1, more2, 0, z2, &z1, &z2);
+    mul64To128(a0, b0, &z0, &more1);
+    add128(z0, more1, 0, z1, &z0, &z1);
+    mul64To128(a0, b1, &more1, &more2);
+    add128(more1, more2, 0, z2, &more1, &z2);
+    add128(z0, z1, 0, more1, &z0, &z1);
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift128ExtraRightJamming(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     int count,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if (count < 64) {
+            z2 = a1<<negCount;
+            z1 = (a0<<negCount) | (a1>>count);
+            z0 = a0>>count;
+        }
+        else {
+            if (count == 64) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if (count < 128) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>(count & 63);
+                }
+                else {
+                    z2 = (count == 128) ? a0 : (a0 != 0);
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= (a2 != 0);
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+#endif  /* FLOAT128 */
+
+#endif
diff --git a/src/cpu/softfloat/softfloat-muladd.cc b/src/cpu/softfloat/softfloat-muladd.cc
new file mode 100644
index 000000000..7c9fec70e
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-muladd.cc
@@ -0,0 +1,558 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * This code is based on QEMU patch by Peter Maydell
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.h"
+#include "softfloat-round-pack.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes three single-precision floating-point values `a', `b' and `c', one of
+| which is a NaN, and returns the appropriate NaN result.  If any of  `a',
+| `b' or `c' is a signaling NaN, the invalid exception is raised.
+| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
+| obviously c is a NaN, and whether to propagate c or some other NaN is
+| implementation defined).
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32MulAddNaN(float32 a, float32 b, float32 c, struct float_status_t *status)
+{
+    int aIsNaN = float32_is_nan(a);
+    int bIsNaN = float32_is_nan(b);
+
+    int aIsSignalingNaN = float32_is_signaling_nan(a);
+    int bIsSignalingNaN = float32_is_signaling_nan(b);
+    int cIsSignalingNaN = float32_is_signaling_nan(c);
+
+    a |= 0x00400000;
+    b |= 0x00400000;
+    c |= 0x00400000;
+
+    if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN)
+        float_raise(status, float_flag_invalid);
+
+    //  operate according to float_first_operand_nan mode
+    if (aIsSignalingNaN | aIsNaN) {
+        return a;
+    }
+    else {
+        return (bIsSignalingNaN | bIsNaN) ? b : c;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes three double-precision floating-point values `a', `b' and `c', one of
+| which is a NaN, and returns the appropriate NaN result.  If any of  `a',
+| `b' or `c' is a signaling NaN, the invalid exception is raised.
+| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
+| obviously c is a NaN, and whether to propagate c or some other NaN is
+| implementation defined).
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64MulAddNaN(float64 a, float64 b, float64 c, struct float_status_t *status)
+{
+    int aIsNaN = float64_is_nan(a);
+    int bIsNaN = float64_is_nan(b);
+
+    int aIsSignalingNaN = float64_is_signaling_nan(a);
+    int bIsSignalingNaN = float64_is_signaling_nan(b);
+    int cIsSignalingNaN = float64_is_signaling_nan(c);
+
+    a |= BX_CONST64(0x0008000000000000);
+    b |= BX_CONST64(0x0008000000000000);
+    c |= BX_CONST64(0x0008000000000000);
+
+    if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN)
+        float_raise(status, float_flag_invalid);
+
+    //  operate according to float_first_operand_nan mode
+    if (aIsSignalingNaN | aIsNaN) {
+        return a;
+    }
+    else {
+        return (bIsSignalingNaN | bIsNaN) ? b : c;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b' then adding 'c', with no intermediate rounding step after the
+| multiplication.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic 754-2008.
+| The flags argument allows the caller to select negation of the
+| addend, the intermediate product, or the final result. (The difference
+| between this and having the caller do a separate negation is that negating
+| externally will flip the sign bit on NaNs.)
+*----------------------------------------------------------------------------*/
+
+float32 float32_muladd(float32 a, float32 b, float32 c, int flags, struct float_status_t *status)
+{
+    int aSign, bSign, cSign, zSign;
+    Bit16s aExp, bExp, cExp, pExp, zExp;
+    Bit32u aSig, bSig, cSig;
+    int pInf, pZero, pSign;
+    Bit64u pSig64, cSig64, zSig64;
+    Bit32u pSig;
+    int shiftcount;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+    bSign = extractFloat32Sign(b);
+    cSig = extractFloat32Frac(c);
+    cExp = extractFloat32Exp(c);
+    cSign = extractFloat32Sign(c);
+
+    /* It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (((aExp == 0xff) && aSig) ||
+        ((bExp == 0xff) && bSig) ||
+        ((cExp == 0xff) && cSig)) {
+        return propagateFloat32MulAddNaN(a, b, c, status);
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+        if (cExp == 0) cSig = 0;
+    }
+
+    int infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
+                   (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
+
+    if (infzero) {
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (flags & float_muladd_negate_c) {
+        cSign ^= 1;
+    }
+
+    /* Work out the sign and type of the product */
+    pSign = aSign ^ bSign;
+    if (flags & float_muladd_negate_product) {
+        pSign ^= 1;
+    }
+    pInf = (aExp == 0xff) || (bExp == 0xff);
+    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
+
+    if (cExp == 0xff) {
+        if (pInf && (pSign ^ cSign)) {
+            /* addition of opposite-signed infinities => InvalidOperation */
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        /* Otherwise generate an infinity of the same sign */
+        if ((aSig && aExp == 0) || (bSig && bExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat32(cSign, 0xff, 0);
+    }
+
+    if (pInf) {
+        if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat32(pSign, 0xff, 0);
+    }
+
+    if (pZero) {
+        if (cExp == 0) {
+            if (cSig == 0) {
+                /* Adding two exact zeroes */
+                if (pSign == cSign) {
+                    zSign = pSign;
+                } else if (get_float_rounding_mode(status) == float_round_down) {
+                    zSign = 1;
+                } else {
+                    zSign = 0;
+                }
+                return packFloat32(zSign, 0, 0);
+            }
+            /* Exact zero plus a denormal */
+            float_raise(status, float_flag_denormal);
+            if (get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat32(cSign, 0, 0);
+            }
+        }
+        /* Zero plus something non-zero */
+        return packFloat32(cSign, cExp, cSig);
+    }
+
+    if (aExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+    }
+
+    /* Calculate the actual result a * b + c */
+
+    /* Multiply first; this is easy. */
+    /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
+     * because we want the true exponent, not the "one-less-than"
+     * flavour that roundAndPackFloat32() takes.
+     */
+    pExp = aExp + bExp - 0x7e;
+    aSig = (aSig | 0x00800000) << 7;
+    bSig = (bSig | 0x00800000) << 8;
+    pSig64 = (Bit64u)aSig * bSig;
+    if ((Bit64s)(pSig64 << 1) >= 0) {
+        pSig64 <<= 1;
+        pExp--;
+    }
+
+    zSign = pSign;
+
+    /* Now pSig64 is the significand of the multiply, with the explicit bit in
+     * position 62.
+     */
+    if (cExp == 0) {
+        if (!cSig) {
+            /* Throw out the special case of c being an exact zero now */
+            pSig = (Bit32u) shift64RightJamming(pSig64, 32);
+            return roundAndPackFloat32(zSign, pExp - 1, pSig, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(cSig, &cExp, &cSig);
+    }
+
+    cSig64 = (Bit64u)cSig << 39;
+    cSig64 |= BX_CONST64(0x4000000000000000);
+    int expDiff = pExp - cExp;
+
+    if (pSign == cSign) {
+        /* Addition */
+        if (expDiff > 0) {
+            /* scale c to match p */
+            cSig64 = shift64RightJamming(cSig64, expDiff);
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            /* scale p to match c */
+            pSig64 = shift64RightJamming(pSig64, -expDiff);
+            zExp = cExp;
+        } else {
+            /* no scaling needed */
+            zExp = cExp;
+        }
+        /* Add significands and make sure explicit bit ends up in posn 62 */
+        zSig64 = pSig64 + cSig64;
+        if ((Bit64s)zSig64 < 0) {
+            zSig64 = shift64RightJamming(zSig64, 1);
+        } else {
+            zExp--;
+        }
+        zSig64 = shift64RightJamming(zSig64, 32);
+        return roundAndPackFloat32(zSign, zExp, zSig64, status);
+    } else {
+        /* Subtraction */
+        if (expDiff > 0) {
+            cSig64 = shift64RightJamming(cSig64, expDiff);
+            zSig64 = pSig64 - cSig64;
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            pSig64 = shift64RightJamming(pSig64, -expDiff);
+            zSig64 = cSig64 - pSig64;
+            zExp = cExp;
+            zSign ^= 1;
+        } else {
+            zExp = pExp;
+            if (cSig64 < pSig64) {
+                zSig64 = pSig64 - cSig64;
+            } else if (pSig64 < cSig64) {
+                zSig64 = cSig64 - pSig64;
+                zSign ^= 1;
+            } else {
+                /* Exact zero */
+                return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0);
+            }
+        }
+        --zExp;
+        /* Do the equivalent of normalizeRoundAndPackFloat32() but
+         * starting with the significand in a Bit64u.
+         */
+        shiftcount = countLeadingZeros64(zSig64) - 1;
+        zSig64 <<= shiftcount;
+        zExp -= shiftcount;
+        zSig64 = shift64RightJamming(zSig64, 32);
+        return roundAndPackFloat32(zSign, zExp, zSig64, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b' then adding 'c', with no intermediate rounding step after the
+| multiplication.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic 754-2008.
+| The flags argument allows the caller to select negation of the
+| addend, the intermediate product, or the final result. (The difference
+| between this and having the caller do a separate negation is that negating
+| externally will flip the sign bit on NaNs.)
+*----------------------------------------------------------------------------*/
+
+float64 float64_muladd(float64 a, float64 b, float64 c, int flags, struct float_status_t *status)
+{
+    int aSign, bSign, cSign, zSign;
+    Bit16s aExp, bExp, cExp, pExp, zExp;
+    Bit64u aSig, bSig, cSig;
+    int pInf, pZero, pSign;
+    Bit64u pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
+    int shiftcount;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+    bSign = extractFloat64Sign(b);
+    cSig = extractFloat64Frac(c);
+    cExp = extractFloat64Exp(c);
+    cSign = extractFloat64Sign(c);
+
+    /* It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (((aExp == 0x7ff) && aSig) ||
+        ((bExp == 0x7ff) && bSig) ||
+        ((cExp == 0x7ff) && cSig)) {
+        return propagateFloat64MulAddNaN(a, b, c, status);
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+        if (cExp == 0) cSig = 0;
+    }
+
+    int infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
+                   (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
+
+    if (infzero) {
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    if (flags & float_muladd_negate_c) {
+        cSign ^= 1;
+    }
+
+    /* Work out the sign and type of the product */
+    pSign = aSign ^ bSign;
+    if (flags & float_muladd_negate_product) {
+        pSign ^= 1;
+    }
+    pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
+    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
+
+    if (cExp == 0x7ff) {
+        if (pInf && (pSign ^ cSign)) {
+            /* addition of opposite-signed infinities => InvalidOperation */
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        /* Otherwise generate an infinity of the same sign */
+        if ((aSig && aExp == 0) || (bSig && bExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat64(cSign, 0x7ff, 0);
+    }
+
+    if (pInf) {
+        if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat64(pSign, 0x7ff, 0);
+    }
+
+    if (pZero) {
+        if (cExp == 0) {
+            if (cSig == 0) {
+                /* Adding two exact zeroes */
+                if (pSign == cSign) {
+                    zSign = pSign;
+                } else if (get_float_rounding_mode(status) == float_round_down) {
+                    zSign = 1;
+                } else {
+                    zSign = 0;
+                }
+                return packFloat64(zSign, 0, 0);
+            }
+            /* Exact zero plus a denormal */
+            float_raise(status, float_flag_denormal);
+            if (get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat64(cSign, 0, 0);
+            }
+        }
+        /* Zero plus something non-zero */
+        return packFloat64(cSign, cExp, cSig);
+    }
+
+    if (aExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+    }
+
+    /* Calculate the actual result a * b + c */
+
+    /* Multiply first; this is easy. */
+    /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
+     * because we want the true exponent, not the "one-less-than"
+     * flavour that roundAndPackFloat64() takes.
+     */
+    pExp = aExp + bExp - 0x3fe;
+    aSig = (aSig | BX_CONST64(0x0010000000000000))<<10;
+    bSig = (bSig | BX_CONST64(0x0010000000000000))<<11;
+    mul64To128(aSig, bSig, &pSig0, &pSig1);
+    if ((Bit64s)(pSig0 << 1) >= 0) {
+        shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
+        pExp--;
+    }
+
+    zSign = pSign;
+
+    /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
+     * bit in position 126.
+     */
+    if (cExp == 0) {
+        if (!cSig) {
+            /* Throw out the special case of c being an exact zero now */
+            shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+            return roundAndPackFloat64(zSign, pExp - 1, pSig1, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(cSig, &cExp, &cSig);
+    }
+
+    cSig0 = cSig << 10;
+    cSig1 = 0;
+    cSig0 |= BX_CONST64(0x4000000000000000);
+    int expDiff = pExp - cExp;
+
+    if (pSign == cSign) {
+        /* Addition */
+        if (expDiff > 0) {
+            /* scale c to match p */
+            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            /* scale p to match c */
+            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
+            zExp = cExp;
+        } else {
+            /* no scaling needed */
+            zExp = cExp;
+        }
+        /* Add significands and make sure explicit bit ends up in posn 126 */
+        add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+        if ((Bit64s)zSig0 < 0) {
+            shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
+        } else {
+            zExp--;
+        }
+        shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+        return roundAndPackFloat64(zSign, zExp, zSig1, status);
+    } else {
+        /* Subtraction */
+        if (expDiff > 0) {
+            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
+            sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
+            sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
+            zExp = cExp;
+            zSign ^= 1;
+        } else {
+            zExp = pExp;
+            if (lt128(cSig0, cSig1, pSig0, pSig1)) {
+                sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+            } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
+                sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
+                zSign ^= 1;
+            } else {
+                /* Exact zero */
+                return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0);
+            }
+        }
+        --zExp;
+        /* Do the equivalent of normalizeRoundAndPackFloat64() but
+         * starting with the significand in a pair of Bit64u.
+         */
+        if (zSig0) {
+            shiftcount = countLeadingZeros64(zSig0) - 1;
+            shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
+            if (zSig1) {
+                zSig0 |= 1;
+            }
+            zExp -= shiftcount;
+        } else {
+            shiftcount = countLeadingZeros64(zSig1) - 1;
+            zSig0 = zSig1 << shiftcount;
+            zExp -= (shiftcount + 64);
+        }
+        return roundAndPackFloat64(zSign, zExp, zSig0, status);
+    }
+}
diff --git a/src/cpu/softfloat/softfloat-round-pack.cc b/src/cpu/softfloat/softfloat-round-pack.cc
new file mode 100644
index 000000000..2b3965840
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-round-pack.cc
@@ -0,0 +1,896 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.h"
+#include "softfloat-round-pack.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s roundAndPackInt32(int zSign, Bit64u exactAbsZ, struct float_status_t *status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int roundIncrement = 0x40;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = 0x7F;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    int roundBits = (int)(exactAbsZ & 0x7F);
+    Bit64u absZ = (exactAbsZ + roundIncrement)>>7;
+    absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    Bit32s z = (Bit32s) absZ;
+    if (zSign) z = -z;
+    if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((absZ << 7) > exactAbsZ)
+            set_float_rounding_up(status);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status)
+{
+    Bit64s z;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int increment = ((Bit64s) absZ1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && absZ1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && absZ1;
+            }
+        }
+    }
+    Bit64u exactAbsZ0 = absZ0;
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) goto overflow;
+        absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
+    }
+    z = absZ0;
+    if (zSign) z = -z;
+    if (z && ((z < 0) ^ zSign)) {
+ overflow:
+        float_raise(status, float_flag_invalid);
+        return (Bit64s)(int64_indefinite);
+    }
+    if (absZ1) {
+        float_raise(status, float_flag_inexact);
+        if (absZ0 > exactAbsZ0)
+            set_float_rounding_up(status);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int increment = ((Bit64s) absZ1 < 0);
+    if (!roundNearestEven) {
+        if (roundingMode == float_round_to_zero) {
+            increment = 0;
+        } else if (absZ1) {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && absZ1;
+            } else {
+                increment = (roundingMode == float_round_up) && absZ1;
+            }
+        }
+    }
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) {
+            float_raise(status, float_flag_invalid);
+            return uint64_indefinite;
+        }
+        absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
+    }
+
+    if (zSign && absZ0) {
+        float_raise(status, float_flag_invalid);
+        return uint64_indefinite;
+    }
+
+    if (absZ1) {
+        float_raise(status, float_flag_inexact);
+    }
+    return absZ0;
+}
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal half-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros16(aSig) - 5;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper half-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the half-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 14
+| and 13, which is 4 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, struct float_status_t *status)
+{
+    Bit16s roundIncrement, roundBits, roundMask;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 8;
+    roundMask = 0xF;
+
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & roundMask;
+    if (0x1D <= (Bit16u) zExp) {
+        if ((0x1D < zExp)
+             || ((zExp == 0x1D) && ((Bit16s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow);
+            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
+                float_raise(status, float_flag_inexact);
+            }
+            return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
+            zSig = shift16RightJamming(zSig, -zExp);
+            zExp = 0;
+            roundBits = zSig & roundMask;
+            if (isTiny) {
+                if(get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_underflow | float_flag_inexact);
+                    return packFloat16(zSign, 0, 0);
+                }
+                // signal the #P according to roundBits calculated AFTER denormalization
+                if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
+                    float_raise(status, float_flag_underflow);
+                }
+            }
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    Bit16u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
+    zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
+    if (zSigRound == 0) zExp = 0;
+    return packFloat16(zSign, zExp, zSigRound);
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros32(aSig) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status)
+{
+    Bit32s roundIncrement, roundBits;
+    const Bit32s roundMask = 0x7F;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x40;
+
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & roundMask;
+    if (0xFD <= (Bit16u) zExp) {
+        if ((0xFD < zExp)
+             || ((zExp == 0xFD) && ((Bit32s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow);
+            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
+                float_raise(status, float_flag_inexact);
+                if (roundIncrement != 0) set_float_rounding_up(status);
+            }
+            return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
+            if (isTiny) {
+                if (!float_exception_masked(status, float_flag_underflow)) {
+                    float_raise(status, float_flag_underflow);
+                    zExp += 192; // bias unmasked underflow
+                }
+            }
+            if (zExp < 0) {
+                zSig = shift32RightJamming(zSig, -zExp);
+                zExp = 0;
+                roundBits = zSig & roundMask;
+                if (isTiny) {
+                    // masked underflow
+                    if(get_flush_underflow_to_zero(status)) {
+                        float_raise(status, float_flag_underflow | float_flag_inexact);
+                        return packFloat32(zSign, 0, 0);
+                    }
+                    if (roundBits) float_raise(status, float_flag_underflow);
+                }
+            }
+        }
+    }
+    Bit32u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 7;
+    zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    if (zSigRound == 0) zExp = 0;
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
+    }
+    return packFloat32(zSign, zExp, zSigRound);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status)
+{
+    int shiftCount = countLeadingZeros32(zSig) - 1;
+    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status)
+{
+    Bit16s roundIncrement, roundBits;
+    const Bit16s roundMask = 0x3FF;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x200;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = (Bit16s)(zSig & roundMask);
+    if (0x7FD <= (Bit16u) zExp) {
+        if ((0x7FD < zExp)
+             || ((zExp == 0x7FD)
+                  && ((Bit64s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow);
+            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
+                float_raise(status, float_flag_inexact);
+                if (roundIncrement != 0) set_float_rounding_up(status);
+            }
+            return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
+            if (isTiny) {
+                if (!float_exception_masked(status, float_flag_underflow)) {
+                    float_raise(status, float_flag_underflow);
+                    zExp += 1536; // bias unmasked underflow
+                }
+            }
+            if (zExp < 0) {
+                zSig = shift64RightJamming(zSig, -zExp);
+                zExp = 0;
+                roundBits = (Bit16s)(zSig & roundMask);
+                if (isTiny) {
+                    // masked underflow
+                    if(get_flush_underflow_to_zero(status)) {
+                        float_raise(status, float_flag_underflow | float_flag_inexact);
+                        return packFloat64(zSign, 0, 0);
+                    }
+                    if (roundBits) float_raise(status, float_flag_underflow);
+                }
+            }
+        }
+    }
+    Bit64u zSigRound = (zSig + roundIncrement)>>10;
+    zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
+    if (zSigRound == 0) zExp = 0;
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
+    }
+    return packFloat64(zSign, zExp, zSigRound);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status)
+{
+    int shiftCount = countLeadingZeros64(zSig) - 1;
+    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig);
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
+{
+    Bit64u roundIncrement, roundMask, roundBits;
+    int increment;
+    Bit64u zSigExact; /* support rounding-up response */
+
+    Bit8u roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    if (roundingPrecision == 64) {
+        roundIncrement = BX_CONST64(0x0000000000000400);
+        roundMask = BX_CONST64(0x00000000000007FF);
+    }
+    else if (roundingPrecision == 32) {
+        roundIncrement = BX_CONST64(0x0000008000000000);
+        roundMask = BX_CONST64(0x000000FFFFFFFFFF);
+    }
+    else goto precision80;
+
+    zSig0 |= (zSig1 != 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if (0x7FFD <= (Bit32u) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0)))
+        {
+            goto overflow;
+        }
+        if (zExp <= 0) {
+            int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
+            zSig0 = shift64RightJamming(zSig0, 1 - zExp);
+            zSigExact = zSig0;
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if (isTiny) {
+                if (roundBits || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
+                    float_raise(status, float_flag_underflow);
+            }
+            zSig0 += roundIncrement;
+            if ((Bit64s) zSig0 < 0) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if (roundNearestEven && (roundBits<<1 == roundIncrement))
+                roundMask |= roundIncrement;
+            zSig0 &= ~roundMask;
+            if (roundBits) {
+                float_raise(status, float_flag_inexact);
+                if (zSig0 > zSigExact) set_float_rounding_up(status);
+            }
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSigExact = zSig0;
+    zSig0 += roundIncrement;
+    if (zSig0 < roundIncrement) {
+        // Basically scale by shifting right and keep overflow
+        ++zExp;
+        zSig0 = BX_CONST64(0x8000000000000000);
+        zSigExact >>= 1; // must scale also, or else later tests will fail
+    }
+    roundIncrement = roundMask + 1;
+    if (roundNearestEven && (roundBits<<1 == roundIncrement))
+        roundMask |= roundIncrement;
+    zSig0 &= ~roundMask;
+    if (zSig0 > zSigExact) set_float_rounding_up(status);
+    if (zSig0 == 0) zExp = 0;
+    return packFloatx80(zSign, zExp, zSig0);
+ precision80:
+    increment = ((Bit64s) zSig1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && zSig1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && zSig1;
+            }
+        }
+    }
+    if (0x7FFD <= (Bit32u) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE)
+                  && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
+                  && increment))
+        {
+            roundMask = 0;
+ overflow:
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            if ((roundingMode == float_round_to_zero)
+                 || (zSign && (roundingMode == float_round_up))
+                 || (! zSign && (roundingMode == float_round_down)))
+            {
+                return packFloatx80(zSign, 0x7FFE, ~roundMask);
+            }
+            set_float_rounding_up(status);
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (zExp <= 0) {
+            int isTiny = (zExp < 0) || (! increment)
+                || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
+            shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
+            zExp = 0;
+            if (isTiny) {
+                if (zSig1 || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
+                    float_raise(status, float_flag_underflow);
+            }
+            if (zSig1) float_raise(status, float_flag_inexact);
+            if (roundNearestEven) increment = ((Bit64s) zSig1 < 0);
+            else {
+                if (zSign) {
+                    increment = (roundingMode == float_round_down) && zSig1;
+                } else {
+                    increment = (roundingMode == float_round_up) && zSig1;
+                }
+            }
+            if (increment) {
+                zSigExact = zSig0++;
+                zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+                if (zSig0 > zSigExact) set_float_rounding_up(status);
+                if ((Bit64s) zSig0 < 0) zExp = 1;
+            }
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (zSig1) float_raise(status, float_flag_inexact);
+    if (increment) {
+        zSigExact = zSig0++;
+        if (zSig0 == 0) {
+            zExp++;
+            zSig0 = BX_CONST64(0x8000000000000000);
+            zSigExact >>= 1;  // must scale also, or else later tests will fail
+        }
+        else {
+            zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+        }
+        if (zSig0 > zSigExact) set_float_rounding_up(status);
+    }
+    else {
+        if (zSig0 == 0) zExp = 0;
+    }
+    return packFloatx80(zSign, zExp, zSig0);
+}
+
+floatx80 roundAndPackFloatx80(int roundingPrecision,
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
+{
+    struct float_status_t *round_status = status;
+    floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+
+    // bias unmasked undeflow
+    if (status->float_exception_flags & ~status->float_exception_masks & float_flag_underflow) {
+       float_raise(round_status, float_flag_underflow);
+       return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status = round_status);
+    }
+
+    // bias unmasked overflow
+    if (status->float_exception_flags & ~status->float_exception_masks & float_flag_overflow) {
+       float_raise(round_status, float_flag_overflow);
+       return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status = round_status);
+    }
+
+    return result;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
+{
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0);
+    shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat128Subnormal(
+     Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr)
+{
+    int shiftCount;
+
+    if (aSig0 == 0) {
+        shiftCount = countLeadingZeros64(aSig1) - 15;
+        if (shiftCount < 0) {
+            *zSig0Ptr = aSig1 >>(-shiftCount);
+            *zSig1Ptr = aSig1 << (shiftCount & 63);
+        }
+        else {
+            *zSig0Ptr = aSig1 << shiftCount;
+            *zSig1Ptr = 0;
+        }
+        *zExpPtr = - shiftCount - 63;
+    }
+    else {
+        shiftCount = countLeadingZeros64(aSig0) - 15;
+        shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
+        *zExpPtr = 1 - shiftCount;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 roundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, struct float_status_t *status)
+{
+    int increment = ((Bit64s) zSig2 < 0);
+    if (0x7FFD <= (Bit32u) zExp) {
+        if ((0x7FFD < zExp)
+             || ((zExp == 0x7FFD)
+                  && eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
+                         BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
+                  && increment))
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat128Four(zSign, 0x7FFF, 0, 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1)
+                || ! increment
+                || lt128(zSig0, zSig1,
+                       BX_CONST64(0x0001FFFFFFFFFFFF),
+                       BX_CONST64(0xFFFFFFFFFFFFFFFF));
+            shift128ExtraRightJamming(
+                zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
+            zExp = 0;
+            if (isTiny && zSig2) float_raise(status, float_flag_underflow);
+            increment = ((Bit64s) zSig2 < 0);
+        }
+    }
+    if (zSig2) float_raise(status, float_flag_inexact);
+    if (increment) {
+        add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
+        zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
+    }
+    else {
+        if ((zSig0 | zSig1) == 0) zExp = 0;
+    }
+    return packFloat128Four(zSign, zExp, zSig0, zSig1);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+float128 normalizeRoundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
+{
+    Bit64u zSig2;
+
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0) - 15;
+    if (0 <= shiftCount) {
+        zSig2 = 0;
+        shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    }
+    else {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
+    }
+    zExp -= shiftCount;
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+#endif
diff --git a/src/cpu/softfloat/softfloat-round-pack.h b/src/cpu/softfloat/softfloat-round-pack.h
new file mode 100644
index 000000000..1422aaea6
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-round-pack.h
@@ -0,0 +1,309 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_ROUND_PACK_H_
+#define _SOFTFLOAT_ROUND_PACK_H_
+
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s roundAndPackInt32(int zSign, Bit64u absZ, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status);
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal half-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper half-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the half-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 14
+| and 13, which is 4 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, struct float_status_t *status);
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status);
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 roundAndPackFloatx80(int roundingPrecision,
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status);
+
+#endif // FLOATX80
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat128Subnormal(
+     Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 roundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+float128 normalizeRoundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status);
+
+#endif // FLOAT128
+
+#endif
diff --git a/src/cpu/softfloat/softfloat-specialize.cc b/src/cpu/softfloat/softfloat-specialize.cc
new file mode 100644
index 000000000..bf0d11144
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-specialize.cc
@@ -0,0 +1,187 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float32 propagateFloat32NaN(float32 a, float32 b, struct float_status_t *status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan(a);
+    aIsSignalingNaN = float32_is_signaling_nan(a);
+    bIsNaN = float32_is_nan(b);
+    bIsSignalingNaN = float32_is_signaling_nan(b);
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((Bit32u) (a<<1) < (Bit32u) (b<<1)) return b;
+            if ((Bit32u) (b<<1) < (Bit32u) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float64 propagateFloat64NaN(float64 a, float64 b, struct float_status_t *status)
+{
+    int aIsNaN = float64_is_nan(a);
+    int aIsSignalingNaN = float64_is_signaling_nan(a);
+    int bIsNaN = float64_is_nan(b);
+    int bIsSignalingNaN = float64_is_signaling_nan(b);
+    a |= BX_CONST64(0x0008000000000000);
+    b |= BX_CONST64(0x0008000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((Bit64u) (a<<1) < (Bit64u) (b<<1)) return b;
+            if ((Bit64u) (b<<1) < (Bit64u) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+    int aIsNaN = floatx80_is_nan(a);
+    int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+    int bIsNaN = floatx80_is_nan(b);
+    int bIsSignalingNaN = floatx80_is_signaling_nan(b);
+    a.fraction |= BX_CONST64(0xC000000000000000);
+    b.fraction |= BX_CONST64(0xC000000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | ! bIsNaN) return a;
+ returnLargerSignificand:
+        if (a.fraction < b.fraction) return b;
+        if (b.fraction < a.fraction) return a;
+        return (a.exp < b.exp) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+#endif /* FLOATX80 */
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float128 propagateFloat128NaN(float128 a, float128 b, struct float_status_t *status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = float128_is_nan(a);
+    aIsSignalingNaN = float128_is_signaling_nan(a);
+    bIsNaN = float128_is_nan(b);
+    bIsSignalingNaN = float128_is_signaling_nan(b);
+    a.hi |= BX_CONST64(0x0000800000000000);
+    b.hi |= BX_CONST64(0x0000800000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | !bIsNaN) return a;
+ returnLargerSignificand:
+        if (lt128(a.hi<<1, a.lo, b.hi<<1, b.lo)) return b;
+        if (lt128(b.hi<<1, b.lo, a.hi<<1, a.lo)) return a;
+        return (a.hi < b.hi) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+const float128 float128_default_nan =
+    packFloat128(float128_default_nan_hi, float128_default_nan_lo);
+
+#endif /* FLOAT128 */
diff --git a/src/cpu/softfloat/softfloat-specialize.h b/src/cpu/softfloat/softfloat-specialize.h
new file mode 100644
index 000000000..11326ecb6
--- /dev/null
+++ b/src/cpu/softfloat/softfloat-specialize.h
@@ -0,0 +1,788 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _SOFTFLOAT_SPECIALIZE_H_
+#define _SOFTFLOAT_SPECIALIZE_H_
+
+#include "softfloat.h"
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define int16_indefinite ((Bit16s)0x8000)
+#define int32_indefinite ((Bit32s)0x80000000)
+#define int64_indefinite BX_CONST64(0x8000000000000000)
+
+#define uint16_indefinite (0xffff)
+#define uint32_indefinite (0xffffffff)
+#define uint64_indefinite BX_CONST64(0xffffffffffffffff)
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+
+typedef struct {
+    int sign;
+    Bit64u hi, lo;
+} commonNaNT;
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float16 float16_default_nan;
+
+#define float16_fraction extractFloat16Frac
+#define float16_exp extractFloat16Exp
+#define float16_sign extractFloat16Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16u extractFloat16Frac(float16 a)
+{
+    return a & 0x3FF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat16Exp(float16 a)
+{
+    return (a>>10) & 0x1F;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat16Sign(float16 a)
+{
+    return a>>15;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float16 packFloat16(int zSign, int zExp, Bit16u zSig)
+{
+    return (((Bit16u) zSign)<<15) + (((Bit16u) zExp)<<10) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float16_is_nan(float16 a)
+{
+    return (0xF800 < (Bit16u) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float16_is_signaling_nan(float16 a)
+{
+    return (((a>>9) & 0x3F) == 0x3E) && (a & 0x1FF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is denormal;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float16_is_denormal(float16 a)
+{
+   return (extractFloat16Exp(a) == 0) && (extractFloat16Frac(a) != 0);
+}
+
+/*----------------------------------------------------------------------------
+| Convert float16 denormals to zero.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float16 float16_denormal_to_zero(float16 a)
+{
+  if (float16_is_denormal(a)) a &= 0x8000;
+  return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float16ToCommonNaN(float16 a, struct float_status_t *status)
+{
+    commonNaNT z;
+    if (float16_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a>>15;
+    z.lo = 0;
+    z.hi = ((Bit64u) a)<<54;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the half-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float16 commonNaNToFloat16(commonNaNT a)
+{
+    return (((Bit16u) a.sign)<<15) | 0x7E00 | (Bit16u)(a.hi>>54);
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Commonly used single-precision floating point constants
+*----------------------------------------------------------------------------*/
+extern const float32 float32_negative_inf;
+extern const float32 float32_positive_inf;
+extern const float32 float32_negative_zero;
+extern const float32 float32_positive_zero;
+extern const float32 float32_negative_one;
+extern const float32 float32_positive_one;
+extern const float32 float32_max_float;
+extern const float32 float32_min_float;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float32 float32_default_nan;
+
+#define float32_fraction extractFloat32Frac
+#define float32_exp extractFloat32Exp
+#define float32_sign extractFloat32Sign
+
+#define FLOAT32_EXP_BIAS 0x7F
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
+{
+    return a & 0x007FFFFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
+{
+    return (a>>23) & 0xFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat32Sign(float32 a)
+{
+    return a>>31;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 packFloat32(int zSign, Bit16s zExp, Bit32u zSig)
+{
+    return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float32_is_nan(float32 a)
+{
+    return (0xFF000000 < (Bit32u) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float32_is_signaling_nan(float32 a)
+{
+    return (((a>>22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is denormal;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float32_is_denormal(float32 a)
+{
+   return (extractFloat32Exp(a) == 0) && (extractFloat32Frac(a) != 0);
+}
+
+/*----------------------------------------------------------------------------
+| Convert float32 denormals to zero.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 float32_denormal_to_zero(float32 a)
+{
+  if (float32_is_denormal(a)) a &= 0x80000000;
+  return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float32ToCommonNaN(float32 a, struct float_status_t *status)
+{
+    commonNaNT z;
+    if (float32_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a>>31;
+    z.lo = 0;
+    z.hi = ((Bit64u) a)<<41;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 commonNaNToFloat32(commonNaNT a)
+{
+    return (((Bit32u) a.sign)<<31) | 0x7FC00000 | (Bit32u)(a.hi>>41);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float32 propagateFloat32NaN(float32 a, float32 b, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes single-precision floating-point NaN `a' and returns the appropriate
+| NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 propagateFloat32NaNOne(float32 a, struct float_status_t *status)
+{
+    if (float32_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    return a | 0x00400000;
+}
+
+/*----------------------------------------------------------------------------
+| Commonly used single-precision floating point constants
+*----------------------------------------------------------------------------*/
+extern const float64 float64_negative_inf;
+extern const float64 float64_positive_inf;
+extern const float64 float64_negative_zero;
+extern const float64 float64_positive_zero;
+extern const float64 float64_negative_one;
+extern const float64 float64_positive_one;
+extern const float64 float64_max_float;
+extern const float64 float64_min_float;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float64 float64_default_nan;
+
+#define float64_fraction extractFloat64Frac
+#define float64_exp extractFloat64Exp
+#define float64_sign extractFloat64Sign
+
+#define FLOAT64_EXP_BIAS 0x3FF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
+{
+    return a & BX_CONST64(0x000FFFFFFFFFFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
+{
+    return (Bit16s)(a>>52) & 0x7FF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat64Sign(float64 a)
+{
+    return (int)(a>>63);
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 packFloat64(int zSign, Bit16s zExp, Bit64u zSig)
+{
+    return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float64_is_nan(float64 a)
+{
+    return (BX_CONST64(0xFFE0000000000000) < (Bit64u) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float64_is_signaling_nan(float64 a)
+{
+    return (((a>>51) & 0xFFF) == 0xFFE) && (a & BX_CONST64(0x0007FFFFFFFFFFFF));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is denormal;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float64_is_denormal(float64 a)
+{
+   return (extractFloat64Exp(a) == 0) && (extractFloat64Frac(a) != 0);
+}
+
+/*----------------------------------------------------------------------------
+| Convert float64 denormals to zero.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 float64_denormal_to_zero(float64 a)
+{
+  if (float64_is_denormal(a)) a &= ((Bit64u)(1) << 63);
+  return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float64ToCommonNaN(float64 a, struct float_status_t *status)
+{
+    commonNaNT z;
+    if (float64_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = (int)(a>>63);
+    z.lo = 0;
+    z.hi = a<<12;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 commonNaNToFloat64(commonNaNT a)
+{
+    return (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FF8000000000000) | (a.hi>>12);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float64 propagateFloat64NaN(float64 a, float64 b, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes double-precision floating-point NaN `a' and returns the appropriate
+| NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 propagateFloat64NaNOne(float64 a, struct float_status_t *status)
+{
+    if (float64_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    return a | BX_CONST64(0x0008000000000000);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_exp 0xFFFF
+#define floatx80_default_nan_fraction BX_CONST64(0xC000000000000000)
+
+#define floatx80_fraction extractFloatx80Frac
+#define floatx80_exp extractFloatx80Exp
+#define floatx80_sign extractFloatx80Sign
+
+#define FLOATX80_EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloatx80Frac(floatx80 a)
+{
+    return a.fraction;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32s extractFloatx80Exp(floatx80 a)
+{
+    return a.exp & 0x7FFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloatx80Sign(floatx80 a)
+{
+    return a.exp>>15;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 packFloatx80(int zSign, Bit32s zExp, Bit64u zSig)
+{
+    floatx80 z;
+    z.fraction = zSig;
+    z.exp = (zSign << 15) + zExp;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_nan(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) == 0x7FFF) && (Bit64s) (a.fraction<<1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_signaling_nan(floatx80 a)
+{
+    Bit64u aLow = a.fraction & ~BX_CONST64(0x4000000000000000);
+    return ((a.exp & 0x7FFF) == 0x7FFF) &&
+            ((Bit64u) (aLow<<1)) && (a.fraction == aLow);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is an
+| unsupported; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_unsupported(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) && !(a.fraction & BX_CONST64(0x8000000000000000)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT floatx80ToCommonNaN(floatx80 a, struct float_status_t *status)
+{
+    commonNaNT z;
+    if (floatx80_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a.exp >> 15;
+    z.lo = 0;
+    z.hi = a.fraction << 1;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 commonNaNToFloatx80(commonNaNT a)
+{
+    floatx80 z;
+    z.fraction = BX_CONST64(0xC000000000000000) | (a.hi>>1);
+    z.exp = (((Bit16u) a.sign)<<15) | 0x7FFF;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 propagateFloatx80NaNOne(floatx80 a, struct float_status_t *status)
+{
+    if (floatx80_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    a.fraction |= BX_CONST64(0xC000000000000000);
+
+    return a;
+}
+
+#endif /* FLOATX80 */
+
+#ifdef FLOAT128
+
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN. The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_hi BX_CONST64(0xFFFF800000000000)
+#define float128_default_nan_lo BX_CONST64(0x0000000000000000)
+
+#define float128_exp extractFloat128Exp
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat128Frac1(float128 a)
+{
+    return a.lo;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat128Frac0(float128 a)
+{
+    return a.hi & BX_CONST64(0x0000FFFFFFFFFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32s extractFloat128Exp(float128 a)
+{
+    return ((Bit32s)(a.hi>>48)) & 0x7FFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat128Sign(float128 a)
+{
+    return (int)(a.hi >> 63);
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float128 packFloat128Four(int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1)
+{
+    float128 z;
+    z.lo = zSig1;
+    z.hi = (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<48) + zSig0;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Packs two 64-bit precision integers into into the quadruple-precision
+| floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float128 packFloat128(Bit64u zHi, Bit64u zLo)
+{
+    float128 z;
+    z.lo = zLo;
+    z.hi = zHi;
+    return z;
+}
+
+#ifdef _MSC_VER
+#define PACK_FLOAT_128(hi,lo) { lo, hi }
+#else
+#define PACK_FLOAT_128(hi,lo) packFloat128(BX_CONST64(hi),BX_CONST64(lo))
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float128_is_nan(float128 a)
+{
+    return (BX_CONST64(0xFFFE000000000000) <= (Bit64u) (a.hi<<1))
+        && (a.lo || (a.hi & BX_CONST64(0x0000FFFFFFFFFFFF)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float128_is_signaling_nan(float128 a)
+{
+    return (((a.hi>>47) & 0xFFFF) == 0xFFFE)
+        && (a.lo || (a.hi & BX_CONST64(0x00007FFFFFFFFFFF)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float128ToCommonNaN(float128 a, struct float_status_t *status)
+{
+    commonNaNT z;
+    if (float128_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = (int)(a.hi>>63);
+    shortShift128Left(a.hi, a.lo, 16, &z.hi, &z.lo);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float128 commonNaNToFloat128(commonNaNT a)
+{
+    float128 z;
+    shift128Right(a.hi, a.lo, 16, &z.hi, &z.lo);
+    z.hi |= (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FFF800000000000);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float128 propagateFloat128NaN(float128 a, float128 b, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* FLOAT128 */
+
+#endif
diff --git a/src/cpu/softfloat/softfloat.cc b/src/cpu/softfloat/softfloat.cc
new file mode 100644
index 000000000..0802089b9
--- /dev/null
+++ b/src/cpu/softfloat/softfloat.cc
@@ -0,0 +1,4012 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.h"
+#include "softfloat-round-pack.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#define USE_estimateDiv128To64
+#define USE_estimateSqrt32
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+const unsigned float_all_exceptions_mask = 0x3f;
+
+float32 int32_to_float32(Bit32s a, struct float_status_t *status)
+{
+    if (a == 0) return 0;
+    if (a == (Bit32s) 0x80000000) return packFloat32(1, 0x9E, 0);
+    int zSign = (a < 0);
+    return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64(Bit32s a)
+{
+    if (a == 0) return 0;
+    int zSign = (a < 0);
+    Bit32u absA = zSign ? -a : a;
+    int shiftCount = countLeadingZeros32(absA) + 21;
+    Bit64u zSig = absA;
+    return packFloat64(zSign, 0x432 - shiftCount, zSig<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32(Bit64s a, struct float_status_t *status)
+{
+    if (a == 0) return 0;
+    int zSign = (a < 0);
+    Bit64u absA = zSign ? -a : a;
+    int shiftCount = countLeadingZeros64(absA) - 40;
+    if (0 <= shiftCount) {
+        return packFloat32(zSign, 0x95 - shiftCount, (Bit32u)(absA<<shiftCount));
+    }
+    else {
+        shiftCount += 7;
+        if (shiftCount < 0) {
+            absA = shift64RightJamming(absA, -shiftCount);
+        }
+        else {
+            absA <<= shiftCount;
+        }
+        return roundAndPackFloat32(zSign, 0x9C - shiftCount, (Bit32u) absA, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64(Bit64s a, struct float_status_t *status)
+{
+    if (a == 0) return 0;
+    if (a == (Bit64s) BX_CONST64(0x8000000000000000)) {
+        return packFloat64(1, 0x43E, 0);
+    }
+    int zSign = (a < 0);
+    return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit unsigned integer `a' to the
+| single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 uint32_to_float32(Bit32u a, struct float_status_t *status)
+{
+    if (a == 0) return 0;
+    if (a & 0x80000000) return normalizeRoundAndPackFloat32(0, 0x9D, a >> 1, status);
+    return normalizeRoundAndPackFloat32(0, 0x9C, a, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit unsigned integer `a' to the
+| double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 uint32_to_float64(Bit32u a)
+{
+   if (a == 0) return 0;
+   int shiftCount = countLeadingZeros32(a) + 21;
+   Bit64u zSig = a;
+   return packFloat64(0, 0x432 - shiftCount, zSig<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit unsigned integer integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 uint64_to_float32(Bit64u a, struct float_status_t *status)
+{
+    if (a == 0) return 0;
+    int shiftCount = countLeadingZeros64(a) - 40;
+    if (0 <= shiftCount) {
+        return packFloat32(0, 0x95 - shiftCount, (Bit32u)(a<<shiftCount));
+    }
+    else {
+        shiftCount += 7;
+        if (shiftCount < 0) {
+            a = shift64RightJamming(a, -shiftCount);
+        }
+        else {
+            a <<= shiftCount;
+        }
+        return roundAndPackFloat32(0, 0x9C - shiftCount, (Bit32u) a, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit unsigned integer integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 uint64_to_float64(Bit64u a, struct float_status_t *status)
+{
+    if (a == 0) return 0;
+    if (a & BX_CONST64(0x8000000000000000))
+        return normalizeRoundAndPackFloat64(0, 0x43D, a >> 1, status);
+    return normalizeRoundAndPackFloat64(0, 0x43C, a, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN or the
+| conversion overflows the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s float32_to_int32(float32 a, struct float_status_t *status)
+{
+    Bit32u aSig = extractFloat32Frac(a);
+    Bit16s aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+    if ((aExp == 0xFF) && aSig) aSign = 0;
+    if (aExp) aSig |= 0x00800000;
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    int shiftCount = 0xAF - aExp;
+    Bit64u aSig64 = Bit64u(aSig) << 32;
+    if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount);
+    return roundAndPackInt32(aSign, aSig64, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s float32_to_int32_round_to_zero(float32 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit32u aSig;
+    Bit32s z;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    int shiftCount = aExp - 0x9E;
+    if (0 <= shiftCount) {
+        if (a != 0xCF000000) {
+            float_raise(status, float_flag_invalid);
+        }
+        return (Bit32s)(int32_indefinite);
+    }
+    else if (aExp <= 0x7E) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    aSig = (aSig | 0x800000)<<8;
+    z = aSig>>(-shiftCount);
+    if ((Bit32u) (aSig<<(shiftCount & 31))) {
+        float_raise(status, float_flag_inexact);
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit unsigned integer format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-point Arithmetic,
+| except that the conversion is always rounded toward zero.  If `a' is a NaN
+| or conversion overflows, the largest positive integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32u float32_to_uint32_round_to_zero(float32 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit32u aSig;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    int shiftCount = aExp - 0x9E;
+
+    if (aExp <= 0x7E) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    else if (0 < shiftCount || aSign) {
+        float_raise(status, float_flag_invalid);
+        return uint32_indefinite;
+    }
+
+    aSig = (aSig | 0x800000)<<8;
+    Bit32u z = aSig >> (-shiftCount);
+    if (aSig << (shiftCount & 31)) {
+        float_raise(status, float_flag_inexact);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s float32_to_int64(float32 a, struct float_status_t *status)
+{
+    Bit64u aSig64, aSigExtra;
+
+    Bit32u aSig = extractFloat32Frac(a);
+    Bit16s aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+
+    int shiftCount = 0xBE - aExp;
+    if (shiftCount < 0) {
+        float_raise(status, float_flag_invalid);
+        return (Bit64s)(int64_indefinite);
+    }
+    if (aExp) aSig |= 0x00800000;
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    aSig64 = aSig;
+    aSig64 <<= 40;
+    shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+    return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s float32_to_int64_round_to_zero(float32 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit32u aSig;
+    Bit64u aSig64;
+    Bit64s z;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    int shiftCount = aExp - 0xBE;
+    if (0 <= shiftCount) {
+        if (a != 0xDF000000) {
+            float_raise(status, float_flag_invalid);
+        }
+        return (Bit64s)(int64_indefinite);
+    }
+    else if (aExp <= 0x7E) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    aSig64 = aSig | 0x00800000;
+    aSig64 <<= 40;
+    z = aSig64>>(-shiftCount);
+    if ((Bit64u) (aSig64<<(shiftCount & 63))) {
+        float_raise(status, float_flag_inexact);
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
+| except that the conversion is always rounded toward zero. If `a' is a NaN
+| or the conversion overflows, the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64u float32_to_uint64_round_to_zero(float32 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit32u aSig;
+    Bit64u aSig64;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    int shiftCount = aExp - 0xBE;
+
+    if (aExp <= 0x7E) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    else if (0 < shiftCount || aSign) {
+        float_raise(status, float_flag_invalid);
+        return uint64_indefinite;
+    }
+
+    aSig64 = aSig | 0x00800000;
+    aSig64 <<= 40;
+    Bit64u z = aSig64>>(-shiftCount);
+    if ((Bit64u) (aSig64<<(shiftCount & 63))) {
+        float_raise(status, float_flag_inexact);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN or the conversion
+| overflows, the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64u float32_to_uint64(float32 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp, shiftCount;
+    Bit32u aSig;
+    Bit64u aSig64, aSigExtra;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+    }
+
+    if ((aSign) && (aExp > 0x7E)) {
+        float_raise(status, float_flag_invalid);
+        return uint64_indefinite;
+    }
+
+    shiftCount = 0xBE - aExp;
+    if (aExp) aSig |= 0x00800000;
+
+    if (shiftCount < 0) {
+        float_raise(status, float_flag_invalid);
+        return uint64_indefinite;
+    }
+
+    aSig64 = aSig;
+    aSig64 <<= 40;
+    shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+    return roundAndPackUint64(aSign, aSig64, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN or the conversion
+| overflows, the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32u float32_to_uint32(float32 a, struct float_status_t *status)
+{
+    Bit64u val_64 = float32_to_uint64(a, status);
+
+    if (val_64 > 0xffffffff) {
+        status->float_exception_flags = float_flag_invalid; // throw away other flags
+        return uint32_indefinite;
+    }
+
+    return (Bit32u) val_64;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64(float32 a, struct float_status_t *status)
+{
+    Bit32u aSig = extractFloat32Frac(a);
+    Bit16s aExp = extractFloat32Exp(a);
+    int  aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return commonNaNToFloat64(float32ToCommonNaN(a, status));
+        return packFloat64(aSign, 0x7FF, 0);
+    }
+    if (aExp == 0) {
+        if (aSig == 0 || get_denormals_are_zeros(status))
+            return packFloat64(aSign, 0, 0);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+        --aExp;
+    }
+    return packFloat64(aSign, aExp + 0x380, ((Bit64u) aSig)<<29);
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int(float32 a, Bit8u scale, struct float_status_t *status)
+{
+    Bit32u lastBitMask, roundBitsMask;
+    int roundingMode = get_float_rounding_mode(status);
+    Bit16s aExp = extractFloat32Exp(a);
+    scale &= 0xf;
+
+    if ((aExp == 0xFF) && extractFloat32Frac(a)) {
+        return propagateFloat32NaNOne(a, status);
+    }
+
+    aExp += scale; // scale the exponent
+
+    if (0x96 <= aExp) {
+        return a;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        a = float32_denormal_to_zero(a);
+    }
+
+    if (aExp <= 0x7E) {
+        if ((Bit32u) (a<<1) == 0) return a;
+        float_raise(status, float_flag_inexact);
+        int aSign = extractFloat32Sign(a);
+        switch (roundingMode) {
+         case float_round_nearest_even:
+            if ((aExp == 0x7E) && extractFloat32Frac(a)) {
+                return packFloat32(aSign, 0x7F - scale, 0);
+            }
+            break;
+         case float_round_down:
+            return aSign ? packFloat32(1, 0x7F - scale, 0) : float32_positive_zero;
+         case float_round_up:
+            return aSign ? float32_negative_zero : packFloat32(0, 0x7F - scale, 0);
+        }
+        return packFloat32(aSign, 0, 0);
+    }
+
+    lastBitMask = 1;
+    lastBitMask <<= 0x96 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    float32 z = a;
+    if (roundingMode == float_round_nearest_even) {
+        z += lastBitMask>>1;
+        if ((z & roundBitsMask) == 0) z &= ~lastBitMask;
+    }
+    else if (roundingMode != float_round_to_zero) {
+        if (extractFloat32Sign(z) ^ (roundingMode == float_round_up)) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~roundBitsMask;
+    if (z != a) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the fractional portion of single-precision floating-point value `a',
+| and returns the result  as a  single-precision  floating-point value. The
+| fractional results are precise. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_frc(float32 a, struct float_status_t *status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+
+    Bit16s aExp = extractFloat32Exp(a);
+    Bit32u aSig = extractFloat32Frac(a);
+    int aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaNOne(a, status);
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (aExp >= 0x96) {
+        return packFloat32(roundingMode == float_round_down, 0, 0);
+    }
+
+    if (aExp < 0x7F) {
+        if (aExp == 0) {
+            if (aSig == 0 || get_denormals_are_zeros(status))
+                return packFloat32(roundingMode == float_round_down, 0, 0);
+
+            float_raise(status, float_flag_denormal);
+            if (! float_exception_masked(status, float_flag_underflow))
+                float_raise(status, float_flag_underflow);
+
+            if(get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat32(aSign, 0, 0);
+            }
+        }
+        return a;
+    }
+
+    Bit32u lastBitMask = 1 << (0x96 - aExp);
+    Bit32u roundBitsMask = lastBitMask - 1;
+
+    aSig &= roundBitsMask;
+    aSig <<= 7;
+    aExp--;
+
+    if (aSig == 0)
+       return packFloat32(roundingMode == float_round_down, 0, 0);
+
+    return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the exponent portion of single-precision floating-point value 'a',
+| and returns the result as a single-precision floating-point value
+| representing unbiased integer exponent. The operation is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_getexp(float32 a, struct float_status_t *status)
+{
+    Bit16s aExp = extractFloat32Exp(a);
+    Bit32u aSig = extractFloat32Frac(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaNOne(a, status);
+        return float32_positive_inf;
+    }
+
+    if (aExp == 0) {
+        if (aSig == 0 || get_denormals_are_zeros(status))
+            return float32_negative_inf;
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+
+    return int32_to_float32(aExp - 0x7F, status);
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the mantissa of single-precision floating-point value 'a' and
+| returns the result as a single-precision floating-point after applying
+| the mantissa interval normalization and sign control. The operation is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_getmant(float32 a, struct float_status_t *status, int sign_ctrl, int interv)
+{
+    Bit16s aExp = extractFloat32Exp(a);
+    Bit32u aSig = extractFloat32Frac(a);
+    int aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaNOne(a, status);
+        if (aSign) {
+            if (sign_ctrl & 0x2) {
+                float_raise(status, float_flag_invalid);
+                return float32_default_nan;
+            }
+        }
+        return packFloat32(~sign_ctrl & aSign, 0x7F, 0);
+    }
+
+    if (aExp == 0 && (aSig == 0 || get_denormals_are_zeros(status))) {
+        return packFloat32(~sign_ctrl & aSign, 0x7F, 0);
+    }
+
+    if (aSign) {
+        if (sign_ctrl & 0x2) {
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+    }
+
+    if (aExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+//      aExp += 0x7E;
+        aSig &= 0x7FFFFF;
+    }
+
+    switch(interv) {
+    case 0x0: // interval [1,2)
+        aExp = 0x7F;
+        break;
+    case 0x1: // interval [1/2,2)
+        aExp -= 0x7F;
+        aExp  = 0x7F - (aExp & 0x1);
+        break;
+    case 0x2: // interval [1/2,1)
+        aExp = 0x7E;
+        break;
+    case 0x3: // interval [3/4,3/2)
+        aExp = 0x7F - ((aSig >> 22) & 0x1);
+        break;
+    }
+
+    return packFloat32(~sign_ctrl & aSign, aExp, aSig);
+}
+
+/*----------------------------------------------------------------------------
+| Return the result of a floating point scale of the single-precision floating
+| point value `a' by multiplying it by 2 power of the single-precision
+| floating point value 'b' converted to integral value. If the result cannot
+| be represented in single precision, then the proper overflow response (for
+| positive scaling operand), or the proper underflow response (for negative
+| scaling operand) is issued. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_scalef(float32 a, float32 b, struct float_status_t *status)
+{
+    Bit32u aSig = extractFloat32Frac(a);
+    Bit16s aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+    Bit32u bSig = extractFloat32Frac(b);
+    Bit16s bExp = extractFloat32Exp(b);
+    int bSign = extractFloat32Sign(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+    }
+
+    if (aExp == 0xFF) {
+        if (aSig) {
+            int aIsSignalingNaN = (aSig & 0x00400000) == 0;
+            if (aIsSignalingNaN || bExp != 0xFF || bSig)
+                return propagateFloat32NaN(a, b, status);
+
+            return bSign ? 0 : float32_positive_inf;
+        }
+
+        if (bExp == 0xFF && bSign) {
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        return a;
+    }
+
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bExp == 0xFF && ! bSign) {
+                float_raise(status, float_flag_invalid);
+                return float32_default_nan;
+            }
+            return a;
+        }
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((bExp | bSig) == 0) return a;
+
+    if (bExp == 0xFF) {
+        if (bSign) return packFloat32(aSign, 0, 0);
+        return packFloat32(aSign, 0xFF, 0);
+    }
+
+    if (bExp >= 0x8E) {
+        // handle obvious overflow/underflow result
+        return roundAndPackFloat32(aSign, bSign ? -0x7F : 0xFF, aSig, status);
+    }
+
+    int scale = 0;
+
+    if (bExp <= 0x7E) {
+        if (bExp == 0)
+            float_raise(status, float_flag_denormal);
+        scale = -bSign;
+    }
+    else {
+        int shiftCount = bExp - 0x9E;
+        bSig = (bSig | 0x800000)<<8;
+        scale = bSig>>(-shiftCount);
+
+        if (bSign) {
+            if ((Bit32u) (bSig<<(shiftCount & 31))) scale++;
+            scale = -scale;
+        }
+
+        if (scale >  0x200) scale =  0x200;
+        if (scale < -0x200) scale = -0x200;
+    }
+
+    if (aExp != 0) {
+        aSig |= 0x00800000;
+    } else {
+        aExp++;
+    }
+
+    aExp += scale - 1;
+    aSig <<= 7;
+    return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs(float32 a, float32 b, int zSign, struct float_status_t *status)
+{
+    Bit16s aExp, bExp, zExp;
+    Bit32u aSig, bSig, zSig;
+    Bit16s expDiff;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 6;
+    bSig <<= 6;
+
+    if (0 < expDiff) {
+        if (aExp == 0xFF) {
+            if (aSig) return propagateFloat32NaN(a, b, status);
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return a;
+        }
+        if ((aExp == 0) && aSig)
+            float_raise(status, float_flag_denormal);
+
+        if (bExp == 0) {
+            if (bSig) float_raise(status, float_flag_denormal);
+            --expDiff;
+        }
+        else bSig |= 0x20000000;
+
+        bSig = shift32RightJamming(bSig, expDiff);
+        zExp = aExp;
+    }
+    else if (expDiff < 0) {
+        if (bExp == 0xFF) {
+            if (bSig) return propagateFloat32NaN(a, b, status);
+            if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat32(zSign, 0xFF, 0);
+        }
+        if ((bExp == 0) && bSig)
+            float_raise(status, float_flag_denormal);
+
+        if (aExp == 0) {
+            if (aSig) float_raise(status, float_flag_denormal);
+            ++expDiff;
+        }
+        else aSig |= 0x20000000;
+
+        aSig = shift32RightJamming(aSig, -expDiff);
+        zExp = bExp;
+    }
+    else {
+        if (aExp == 0xFF) {
+            if (aSig | bSig) return propagateFloat32NaN(a, b, status);
+            return a;
+        }
+        if (aExp == 0) {
+            zSig = (aSig + bSig) >> 6;
+            if (aSig | bSig) {
+                float_raise(status, float_flag_denormal);
+                if (get_flush_underflow_to_zero(status) && (extractFloat32Frac(zSig) == zSig)) {
+                    float_raise(status, float_flag_underflow | float_flag_inexact);
+                    return packFloat32(zSign, 0, 0);
+                }
+                if (! float_exception_masked(status, float_flag_underflow)) {
+                    if (extractFloat32Frac(zSig) == zSig)
+                        float_raise(status, float_flag_underflow);
+                }
+            }
+            return packFloat32(zSign, 0, zSig);
+        }
+        zSig = 0x40000000 + aSig + bSig;
+        return roundAndPackFloat32(zSign, aExp, zSig, status);
+    }
+    aSig |= 0x20000000;
+    zSig = (aSig + bSig)<<1;
+    --zExp;
+    if ((Bit32s) zSig < 0) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+    return roundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs(float32 a, float32 b, int zSign, struct float_status_t *status)
+{
+    Bit16s aExp, bExp, zExp;
+    Bit32u aSig, bSig, zSig;
+    Bit16s expDiff;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 7;
+    bSig <<= 7;
+    if (0 < expDiff) goto aExpBigger;
+    if (expDiff < 0) goto bExpBigger;
+    if (aExp == 0xFF) {
+        if (aSig | bSig) return propagateFloat32NaN(a, b, status);
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig | bSig) float_raise(status, float_flag_denormal);
+        aExp = 1;
+        bExp = 1;
+    }
+    if (bSig < aSig) goto aBigger;
+    if (aSig < bSig) goto bBigger;
+    return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0);
+ bExpBigger:
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign ^ 1, 0xFF, 0);
+    }
+    if ((bExp == 0) && bSig)
+        float_raise(status, float_flag_denormal);
+
+    if (aExp == 0) {
+        if (aSig) float_raise(status, float_flag_denormal);
+        ++expDiff;
+    }
+    else aSig |= 0x40000000;
+
+    aSig = shift32RightJamming(aSig, -expDiff);
+    bSig |= 0x40000000;
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaN(a, b, status);
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if ((aExp == 0) && aSig)
+        float_raise(status, float_flag_denormal);
+
+    if (bExp == 0) {
+        if (bSig) float_raise(status, float_flag_denormal);
+        --expDiff;
+    }
+    else bSig |= 0x40000000;
+
+    bSig = shift32RightJamming(bSig, expDiff);
+    aSig |= 0x40000000;
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add(float32 a, float32 b, struct float_status_t *status)
+{
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+
+    if (aSign == bSign) {
+        return addFloat32Sigs(a, b, aSign, status);
+    }
+    else {
+        return subFloat32Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub(float32 a, float32 b, struct float_status_t *status)
+{
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+
+    if (aSign == bSign) {
+        return subFloat32Sigs(a, b, aSign, status);
+    }
+    else {
+        return addFloat32Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul(float32 a, float32 b, struct float_status_t *status)
+{
+    int aSign, bSign, zSign;
+    Bit16s aExp, bExp, zExp;
+    Bit32u aSig, bSig;
+    Bit64u zSig64;
+    Bit32u zSig;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+    bSign = extractFloat32Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0xFF) {
+        if (aSig || ((bExp == 0xFF) && bSig))
+            return propagateFloat32NaN(a, b, status);
+
+        if ((bExp | bSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0xFF, 0);
+    }
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0xFF, 0);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat32(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloat32(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+    }
+    zExp = aExp + bExp - 0x7F;
+    aSig = (aSig | 0x00800000)<<7;
+    bSig = (bSig | 0x00800000)<<8;
+    zSig64 = shift64RightJamming(((Bit64u) aSig) * bSig, 32);
+    zSig = (Bit32u) zSig64;
+    if (0 <= (Bit32s) (zSig<<1)) {
+        zSig <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div(float32 a, float32 b, struct float_status_t *status)
+{
+    int aSign, bSign, zSign;
+    Bit16s aExp, bExp, zExp;
+    Bit32u aSig, bSig, zSig;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+    bSign = extractFloat32Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaN(a, b, status);
+        if (bExp == 0xFF) {
+            if (bSig) return propagateFloat32NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0xFF, 0);
+    }
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0, 0);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if ((aExp | aSig) == 0) {
+                float_raise(status, float_flag_invalid);
+                return float32_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloat32(zSign, 0xFF, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloat32(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = aExp - bExp + 0x7D;
+    aSig = (aSig | 0x00800000)<<7;
+    bSig = (bSig | 0x00800000)<<8;
+    if (bSig <= (aSig + aSig)) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = (((Bit64u) aSig)<<32) / bSig;
+    if ((zSig & 0x3F) == 0) {
+        zSig |= ((Bit64u) bSig * zSig != ((Bit64u) aSig)<<32);
+    }
+    return roundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt(float32 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp, zExp;
+    Bit32u aSig, zSig;
+    Bit64u rem, term;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaNOne(a, status);
+        if (! aSign) return a;
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+    }
+
+    if (aSign) {
+        if ((aExp | aSig) == 0) return packFloat32(aSign, 0, 0);
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return 0;
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = ((aExp - 0x7F)>>1) + 0x7E;
+    aSig = (aSig | 0x00800000)<<8;
+    zSig = estimateSqrt32(aExp, aSig) + 2;
+    if ((zSig & 0x7F) <= 5) {
+        if (zSig < 2) {
+            zSig = 0x7FFFFFFF;
+            goto roundAndPack;
+        }
+        aSig >>= aExp & 1;
+        term = ((Bit64u) zSig) * zSig;
+        rem = (((Bit64u) aSig)<<32) - term;
+        while ((Bit64s) rem < 0) {
+            --zSig;
+            rem += (((Bit64u) zSig)<<1) | 1;
+        }
+        zSig |= (rem != 0);
+    }
+    zSig = shift32RightJamming(zSig, 1);
+ roundAndPack:
+    return roundAndPackFloat32(0, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine single-precision floating-point number class.
+*----------------------------------------------------------------------------*/
+
+float_class_t float32_class(float32 a)
+{
+   Bit16s aExp = extractFloat32Exp(a);
+   Bit32u aSig = extractFloat32Frac(a);
+   int  aSign = extractFloat32Sign(a);
+
+   if(aExp == 0xFF) {
+       if (aSig == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return (aSig & 0x00400000) ? float_QNaN : float_SNaN;
+   }
+
+   if(aExp == 0) {
+       if (aSig == 0) return float_zero;
+       return float_denormal;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two  single  precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise.
+*----------------------------------------------------------------------------*/
+
+int float32_compare(float32 a, float32 b, int quiet, struct float_status_t *status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float32_denormal_to_zero(a);
+        b = float32_denormal_to_zero(b);
+    }
+
+    float_class_t aClass = float32_class(a);
+    float_class_t bClass = float32_class(b);
+
+    if (aClass == float_SNaN || bClass == float_SNaN) {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_QNaN || bClass == float_QNaN) {
+        if (! quiet) float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a == b) || ((Bit32u) ((a | b)<<1) == 0)) return float_relation_equal;
+
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    if (aSign ^ (a < b)) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare between two single precision floating point numbers and return the
+| smaller of them.
+*----------------------------------------------------------------------------*/
+
+float32 float32_min(float32 a, float32 b, struct float_status_t *status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float32_denormal_to_zero(a);
+    b = float32_denormal_to_zero(b);
+  }
+
+  return (float32_compare_two(a, b, status) == float_relation_less) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Compare between two single precision floating point numbers and return the
+| larger of them.
+*----------------------------------------------------------------------------*/
+
+float32 float32_max(float32 a, float32 b, struct float_status_t *status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float32_denormal_to_zero(a);
+    b = float32_denormal_to_zero(b);
+  }
+
+  return (float32_compare_two(a, b, status) == float_relation_greater) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Compare between two  single precision  floating point numbers and  return the
+| smaller/larger of them. The operation  is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_minmax(float32 a, float32 b, int is_max, int is_abs, struct float_status_t *status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float32_denormal_to_zero(a);
+        b = float32_denormal_to_zero(b);
+    }
+
+    if (float32_is_nan(a) || float32_is_nan(b)) {
+        if (float32_is_signaling_nan(a)) {
+            return propagateFloat32NaNOne(a, status);
+        }
+        if (float32_is_signaling_nan(b) ) {
+            return propagateFloat32NaNOne(b, status);
+        }
+        if (! float32_is_nan(b)) {
+            if (float32_is_denormal(b))
+                float_raise(status, float_flag_denormal);
+            return b;
+        }
+        if (! float32_is_nan(a)) {
+            if (float32_is_denormal(a))
+                float_raise(status, float_flag_denormal);
+            return a;
+        }
+        return propagateFloat32NaN(a, b, status);
+    }
+
+    float32 tmp_a = a, tmp_b = b;
+    if (is_abs) {
+        tmp_a &= ~0x80000000; // clear the sign bit
+        tmp_b &= ~0x80000000;
+    }
+
+    int aSign = extractFloat32Sign(tmp_a);
+    int bSign = extractFloat32Sign(tmp_b);
+
+    if (float32_is_denormal(a) || float32_is_denormal(b))
+        float_raise(status, float_flag_denormal);
+
+    if (aSign != bSign) {
+        if (! is_max) {
+            return aSign ? a : b;
+        } else {
+            return aSign ? b : a;
+        }
+    } else {
+        if (! is_max) {
+            return (aSign ^ (tmp_a < tmp_b)) ? a : b;
+        } else {
+            return (aSign ^ (tmp_a < tmp_b)) ? b : a;
+        }
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s float64_to_int32(float64 a, struct float_status_t *status)
+{
+    Bit64u aSig = extractFloat64Frac(a);
+    Bit16s aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+    if ((aExp == 0x7FF) && aSig) aSign = 0;
+    if (aExp) aSig |= BX_CONST64(0x0010000000000000);
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    int shiftCount = 0x42C - aExp;
+    if (0 < shiftCount) aSig = shift64RightJamming(aSig, shiftCount);
+    return roundAndPackInt32(aSign, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value  is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s float64_to_int32_round_to_zero(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit64u aSig, savedASig;
+    Bit32s z;
+    int shiftCount;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (0x41E < aExp) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    else if (aExp < 0x3FF) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp || aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    aSig |= BX_CONST64(0x0010000000000000);
+    shiftCount = 0x433 - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = (Bit32s) aSig;
+    if (aSign) z = -z;
+    if ((z < 0) ^ aSign) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    if ((aSig<<shiftCount) != savedASig) {
+        float_raise(status, float_flag_inexact);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit unsigned integer format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-point Arithmetic,
+| except that the conversion is always rounded toward zero.  If `a' is a NaN
+| or conversion overflows, the largest positive integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32u float64_to_uint32_round_to_zero(float64 a, struct float_status_t *status)
+{
+    Bit64u aSig = extractFloat64Frac(a);
+    Bit16s aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+
+    if (aExp < 0x3FF) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp || aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+
+    if (0x41E < aExp || aSign) {
+        float_raise(status, float_flag_invalid);
+        return uint32_indefinite;
+    }
+
+    aSig |= BX_CONST64(0x0010000000000000);
+    int shiftCount = 0x433 - aExp;
+    Bit64u savedASig = aSig;
+    aSig >>= shiftCount;
+    if ((aSig<<shiftCount) != savedASig) {
+        float_raise(status, float_flag_inexact);
+    }
+    return (Bit32u) aSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s float64_to_int64(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit64u aSig, aSigExtra;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp) aSig |= BX_CONST64(0x0010000000000000);
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    int shiftCount = 0x433 - aExp;
+    if (shiftCount <= 0) {
+        if (0x43E < aExp) {
+            float_raise(status, float_flag_invalid);
+            return (Bit64s)(int64_indefinite);
+        }
+        aSigExtra = 0;
+        aSig <<= -shiftCount;
+    }
+    else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value  is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s float64_to_int64_round_to_zero(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit64u aSig;
+    Bit64s z;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp) aSig |= BX_CONST64(0x0010000000000000);
+    int shiftCount = aExp - 0x433;
+    if (0 <= shiftCount) {
+        if (0x43E <= aExp) {
+            if (a != BX_CONST64(0xC3E0000000000000)) {
+                float_raise(status, float_flag_invalid);
+            }
+            return (Bit64s)(int64_indefinite);
+        }
+        z = aSig<<shiftCount;
+    }
+    else {
+        if (aExp < 0x3FE) {
+            if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+            if (aExp | aSig) float_raise(status, float_flag_inexact);
+            return 0;
+        }
+        z = aSig>>(-shiftCount);
+        if ((Bit64u) (aSig<<(shiftCount & 63))) {
+             float_raise(status, float_flag_inexact);
+        }
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
+| except that the conversion is always rounded toward zero. If `a' is a NaN
+| or the conversion overflows, the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64u float64_to_uint64_round_to_zero(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit64u aSig, z;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+
+    if (aExp < 0x3FE) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+
+    if (0x43E <= aExp || aSign) {
+        float_raise(status, float_flag_invalid);
+        return uint64_indefinite;
+    }
+
+    if (aExp) aSig |= BX_CONST64(0x0010000000000000);
+    int shiftCount = aExp - 0x433;
+
+    if (0 <= shiftCount) {
+        z = aSig<<shiftCount;
+    }
+    else {
+        z = aSig>>(-shiftCount);
+        if ((Bit64u) (aSig<<(shiftCount & 63))) {
+             float_raise(status, float_flag_inexact);
+        }
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN or the conversion
+| overflows, the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32u float64_to_uint32(float64 a, struct float_status_t *status)
+{
+    Bit64u val_64 = float64_to_uint64(a, status);
+
+    if (val_64 > 0xffffffff) {
+        status->float_exception_flags = float_flag_invalid; // throw away other flags
+        return uint32_indefinite;
+    }
+
+    return (Bit32u) val_64;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN or the conversion
+| overflows, the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64u float64_to_uint64(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp, shiftCount;
+    Bit64u aSig, aSigExtra;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+    }
+
+    if (aSign && (aExp > 0x3FE)) {
+        float_raise(status, float_flag_invalid);
+        return uint64_indefinite;
+    }
+
+    if (aExp) {
+        aSig |= BX_CONST64(0x0010000000000000);
+    }
+    shiftCount = 0x433 - aExp;
+    if (shiftCount <= 0) {
+        if (0x43E < aExp) {
+            float_raise(status, float_flag_invalid);
+            return uint64_indefinite;
+        }
+        aSigExtra = 0;
+        aSig <<= -shiftCount;
+    } else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+
+    return roundAndPackUint64(aSign, aSig, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp;
+    Bit64u aSig;
+    Bit32u zSig;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp == 0x7FF) {
+        if (aSig) return commonNaNToFloat32(float64ToCommonNaN(a, status));
+        return packFloat32(aSign, 0xFF, 0);
+    }
+    if (aExp == 0) {
+        if (aSig == 0 || get_denormals_are_zeros(status))
+            return packFloat32(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+    }
+    aSig = shift64RightJamming(aSig, 22);
+    zSig = (Bit32u) aSig;
+    if (aExp || zSig) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32(aSign, aExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int(float64 a, Bit8u scale, struct float_status_t *status)
+{
+    Bit64u lastBitMask, roundBitsMask;
+    int roundingMode = get_float_rounding_mode(status);
+    Bit16s aExp = extractFloat64Exp(a);
+    scale &= 0xf;
+
+    if ((aExp == 0x7FF) && extractFloat64Frac(a)) {
+        return propagateFloat64NaNOne(a, status);
+    }
+
+    aExp += scale; // scale the exponent
+
+    if (0x433 <= aExp) {
+        return a;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        a = float64_denormal_to_zero(a);
+    }
+
+    if (aExp < 0x3FF) {
+        if ((Bit64u) (a<<1) == 0) return a;
+        float_raise(status, float_flag_inexact);
+        int aSign = extractFloat64Sign(a);
+        switch (roundingMode) {
+         case float_round_nearest_even:
+            if ((aExp == 0x3FE) && extractFloat64Frac(a)) {
+              return packFloat64(aSign, 0x3FF - scale, 0);
+            }
+            break;
+         case float_round_down:
+            return aSign ? packFloat64(1, 0x3FF - scale, 0) : float64_positive_zero;
+         case float_round_up:
+            return aSign ? float64_negative_zero : packFloat64(0, 0x3FF - scale, 0);
+        }
+        return packFloat64(aSign, 0, 0);
+    }
+
+    lastBitMask = 1;
+    lastBitMask <<= 0x433 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    float64 z = a;
+    if (roundingMode == float_round_nearest_even) {
+        z += lastBitMask>>1;
+        if ((z & roundBitsMask) == 0) z &= ~lastBitMask;
+    }
+    else if (roundingMode != float_round_to_zero) {
+        if (extractFloat64Sign(z) ^ (roundingMode == float_round_up)) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~roundBitsMask;
+    if (z != a) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the fractional portion of double-precision floating-point value `a',
+| and returns the result  as a  double-precision  floating-point value. The
+| fractional results are precise. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_frc(float64 a, struct float_status_t *status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+
+    Bit64u aSig = extractFloat64Frac(a);
+    Bit16s aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaNOne(a, status);
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    if (aExp >= 0x433) {
+        return packFloat64(roundingMode == float_round_down, 0, 0);
+    }
+
+    if (aExp < 0x3FF) {
+        if (aExp == 0) {
+            if (aSig == 0 || get_denormals_are_zeros(status))
+                return packFloat64(roundingMode == float_round_down, 0, 0);
+
+            float_raise(status, float_flag_denormal);
+            if (! float_exception_masked(status, float_flag_underflow))
+                float_raise(status, float_flag_underflow);
+
+            if(get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat64(aSign, 0, 0);
+            }
+        }
+        return a;
+    }
+
+    Bit64u lastBitMask = BX_CONST64(1) << (0x433 - aExp);
+    Bit64u roundBitsMask = lastBitMask - 1;
+
+    aSig &= roundBitsMask;
+    aSig <<= 10;
+    aExp--;
+
+    if (aSig == 0)
+       return packFloat64(roundingMode == float_round_down, 0, 0);
+
+    return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the exponent portion of double-precision floating-point value 'a',
+| and returns the result as a double-precision floating-point value
+| representing unbiased integer exponent. The operation is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_getexp(float64 a, struct float_status_t *status)
+{
+    Bit16s aExp = extractFloat64Exp(a);
+    Bit64u aSig = extractFloat64Frac(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaNOne(a, status);
+        return float64_positive_inf;
+    }
+
+    if (aExp == 0) {
+        if (aSig == 0 || get_denormals_are_zeros(status))
+            return float64_negative_inf;
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+
+    return int32_to_float64(aExp - 0x3FF);
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the mantissa of double-precision floating-point value 'a' and
+| returns the result as a double-precision floating-point after applying
+| the mantissa interval normalization and sign control. The operation is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_getmant(float64 a, struct float_status_t *status, int sign_ctrl, int interv)
+{
+    Bit16s aExp = extractFloat64Exp(a);
+    Bit64u aSig = extractFloat64Frac(a);
+    int aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaNOne(a, status);
+        if (aSign) {
+            if (sign_ctrl & 0x2) {
+                float_raise(status, float_flag_invalid);
+                return float64_default_nan;
+            }
+        }
+        return packFloat64(~sign_ctrl & aSign, 0x3FF, 0);
+    }
+
+    if (aExp == 0 && (aSig == 0 || get_denormals_are_zeros(status))) {
+        return packFloat64(~sign_ctrl & aSign, 0x3FF, 0);
+    }
+
+    if (aSign) {
+        if (sign_ctrl & 0x2) {
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+    }
+
+    if (aExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+//      aExp += 0x3FE;
+        aSig &= BX_CONST64(0xFFFFFFFFFFFFFFFF);
+    }
+
+    switch(interv) {
+    case 0x0: // interval [1,2)
+        aExp = 0x3FF;
+        break;
+    case 0x1: // interval [1/2,2)
+        aExp -= 0x3FF;
+        aExp  = 0x3FF - (aExp & 0x1);
+        break;
+    case 0x2: // interval [1/2,1)
+        aExp = 0x3FE;
+        break;
+    case 0x3: // interval [3/4,3/2)
+        aExp = 0x3FF - ((aSig >> 51) & 0x1);
+        break;
+    }
+
+    return packFloat64(~sign_ctrl & aSign, aExp, aSig);
+}
+
+/*----------------------------------------------------------------------------
+| Return the result of a floating point scale of the double-precision floating
+| point value `a' by multiplying it by 2 power of the double-precision
+| floating point value 'b' converted to integral value. If the result cannot
+| be represented in double precision, then the proper overflow response (for
+| positive scaling operand), or the proper underflow response (for negative
+| scaling operand) is issued. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_scalef(float64 a, float64 b, struct float_status_t *status)
+{
+    Bit64u aSig = extractFloat64Frac(a);
+    Bit16s aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+    Bit64u bSig = extractFloat64Frac(b);
+    Bit16s bExp = extractFloat64Exp(b);
+    int bSign = extractFloat64Sign(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+    }
+
+    if (aExp == 0x7FF) {
+        if (aSig) {
+            int aIsSignalingNaN = (aSig & BX_CONST64(0x0008000000000000)) == 0;
+            if (aIsSignalingNaN || bExp != 0x7FF || bSig)
+                return propagateFloat64NaN(a, b, status);
+
+            return bSign ? 0 : float64_positive_inf;
+        }
+
+        if (bExp == 0x7FF && bSign) {
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        return a;
+    }
+
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bExp == 0x7FF && ! bSign) {
+                float_raise(status, float_flag_invalid);
+                return float64_default_nan;
+            }
+            return a;
+        }
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((bExp | bSig) == 0) return a;
+
+    if (bExp == 0x7FF) {
+        if (bSign) return packFloat64(aSign, 0, 0);
+        return packFloat64(aSign, 0x7FF, 0);
+    }
+
+    if (0x40F <= bExp) {
+        // handle obvious overflow/underflow result
+        return roundAndPackFloat64(aSign, bSign ? -0x3FF : 0x7FF, aSig, status);
+    }
+
+    int scale = 0;
+
+    if (bExp < 0x3FF) {
+        if (bExp == 0)
+            float_raise(status, float_flag_denormal);
+        scale = -bSign;
+    }
+    else {
+        bSig |= BX_CONST64(0x0010000000000000);
+        int shiftCount = 0x433 - bExp;
+        Bit64u savedBSig = bSig;
+        bSig >>= shiftCount;
+        scale = (Bit32s) bSig;
+        if (bSign) {
+            if ((bSig<<shiftCount) != savedBSig) scale++;
+            scale = -scale;
+        }
+
+        if (scale >  0x1000) scale =  0x1000;
+        if (scale < -0x1000) scale = -0x1000;
+    }
+
+    if (aExp != 0) {
+        aSig |= BX_CONST64(0x0010000000000000);
+    } else {
+        aExp++;
+    }
+
+    aExp += scale - 1;
+    aSig <<= 10;
+    return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs(float64 a, float64 b, int zSign, struct float_status_t *status)
+{
+    Bit16s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig;
+    Bit16s expDiff;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 9;
+    bSig <<= 9;
+    if (0 < expDiff) {
+        if (aExp == 0x7FF) {
+            if (aSig) return propagateFloat64NaN(a, b, status);
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return a;
+        }
+        if ((aExp == 0) && aSig)
+            float_raise(status, float_flag_denormal);
+
+        if (bExp == 0) {
+            if (bSig) float_raise(status, float_flag_denormal);
+            --expDiff;
+        }
+        else bSig |= BX_CONST64(0x2000000000000000);
+
+        bSig = shift64RightJamming(bSig, expDiff);
+        zExp = aExp;
+    }
+    else if (expDiff < 0) {
+        if (bExp == 0x7FF) {
+            if (bSig) return propagateFloat64NaN(a, b, status);
+            if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat64(zSign, 0x7FF, 0);
+        }
+        if ((bExp == 0) && bSig)
+            float_raise(status, float_flag_denormal);
+
+        if (aExp == 0) {
+            if (aSig) float_raise(status, float_flag_denormal);
+            ++expDiff;
+        }
+        else aSig |= BX_CONST64(0x2000000000000000);
+
+        aSig = shift64RightJamming(aSig, -expDiff);
+        zExp = bExp;
+    }
+    else {
+        if (aExp == 0x7FF) {
+            if (aSig | bSig) return propagateFloat64NaN(a, b, status);
+            return a;
+        }
+        if (aExp == 0) {
+            zSig = (aSig + bSig) >> 9;
+            if (aSig | bSig) {
+                float_raise(status, float_flag_denormal);
+                if (get_flush_underflow_to_zero(status) && (extractFloat64Frac(zSig) == zSig)) {
+                    float_raise(status, float_flag_underflow | float_flag_inexact);
+                    return packFloat64(zSign, 0, 0);
+                }
+                if (! float_exception_masked(status, float_flag_underflow)) {
+                    if (extractFloat64Frac(zSig) == zSig)
+                        float_raise(status, float_flag_underflow);
+                }
+            }
+            return packFloat64(zSign, 0, zSig);
+        }
+        zSig = BX_CONST64(0x4000000000000000) + aSig + bSig;
+        return roundAndPackFloat64(zSign, aExp, zSig, status);
+    }
+    aSig |= BX_CONST64(0x2000000000000000);
+    zSig = (aSig + bSig)<<1;
+    --zExp;
+    if ((Bit64s) zSig < 0) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+    return roundAndPackFloat64(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs(float64 a, float64 b, int zSign, struct float_status_t *status)
+{
+    Bit16s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig;
+    Bit16s expDiff;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 10;
+    bSig <<= 10;
+    if (0 < expDiff) goto aExpBigger;
+    if (expDiff < 0) goto bExpBigger;
+    if (aExp == 0x7FF) {
+        if (aSig | bSig) return propagateFloat64NaN(a, b, status);
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig | bSig) float_raise(status, float_flag_denormal);
+        aExp = 1;
+        bExp = 1;
+    }
+    if (bSig < aSig) goto aBigger;
+    if (aSig < bSig) goto bBigger;
+    return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0);
+ bExpBigger:
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign ^ 1, 0x7FF, 0);
+    }
+    if ((bExp == 0) && bSig)
+        float_raise(status, float_flag_denormal);
+
+    if (aExp == 0) {
+        if (aSig) float_raise(status, float_flag_denormal);
+        ++expDiff;
+    }
+    else aSig |= BX_CONST64(0x4000000000000000);
+
+    aSig = shift64RightJamming(aSig, -expDiff);
+    bSig |= BX_CONST64(0x4000000000000000);
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaN(a, b, status);
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if ((aExp == 0) && aSig)
+        float_raise(status, float_flag_denormal);
+
+    if (bExp == 0) {
+        if (bSig) float_raise(status, float_flag_denormal);
+        --expDiff;
+    }
+    else bSig |= BX_CONST64(0x4000000000000000);
+
+    bSig = shift64RightJamming(bSig, expDiff);
+    aSig |= BX_CONST64(0x4000000000000000);
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add(float64 a, float64 b, struct float_status_t *status)
+{
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+
+    if (aSign == bSign) {
+        return addFloat64Sigs(a, b, aSign, status);
+    }
+    else {
+        return subFloat64Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub(float64 a, float64 b, struct float_status_t *status)
+{
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+
+    if (aSign == bSign) {
+        return subFloat64Sigs(a, b, aSign, status);
+    }
+    else {
+        return addFloat64Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul(float64 a, float64 b, struct float_status_t *status)
+{
+    int aSign, bSign, zSign;
+    Bit16s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+    bSign = extractFloat64Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0x7FF) {
+        if (aSig || ((bExp == 0x7FF) && bSig)) {
+            return propagateFloat64NaN(a, b, status);
+        }
+        if ((bExp | bSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0x7FF, 0);
+    }
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0x7FF, 0);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat64(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloat64(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+    }
+    zExp = aExp + bExp - 0x3FF;
+    aSig = (aSig | BX_CONST64(0x0010000000000000))<<10;
+    bSig = (bSig | BX_CONST64(0x0010000000000000))<<11;
+    mul64To128(aSig, bSig, &zSig0, &zSig1);
+    zSig0 |= (zSig1 != 0);
+    if (0 <= (Bit64s) (zSig0<<1)) {
+        zSig0 <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat64(zSign, zExp, zSig0, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div(float64 a, float64 b, struct float_status_t *status)
+{
+    int aSign, bSign, zSign;
+    Bit16s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig;
+    Bit64u rem0, rem1;
+    Bit64u term0, term1;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+    bSign = extractFloat64Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaN(a, b, status);
+        if (bExp == 0x7FF) {
+            if (bSig) return propagateFloat64NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0x7FF, 0);
+    }
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0, 0);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if ((aExp | aSig) == 0) {
+                float_raise(status, float_flag_invalid);
+                return float64_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloat64(zSign, 0x7FF, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloat64(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = aExp - bExp + 0x3FD;
+    aSig = (aSig | BX_CONST64(0x0010000000000000))<<10;
+    bSig = (bSig | BX_CONST64(0x0010000000000000))<<11;
+    if (bSig <= (aSig + aSig)) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = estimateDiv128To64(aSig, 0, bSig);
+    if ((zSig & 0x1FF) <= 2) {
+        mul64To128(bSig, zSig, &term0, &term1);
+        sub128(aSig, 0, term0, term1, &rem0, &rem1);
+        while ((Bit64s) rem0 < 0) {
+            --zSig;
+            add128(rem0, rem1, 0, bSig, &rem0, &rem1);
+        }
+        zSig |= (rem1 != 0);
+    }
+    return roundAndPackFloat64(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt(float64 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit16s aExp, zExp;
+    Bit64u aSig, zSig, doubleZSig;
+    Bit64u rem0, rem1, term0, term1;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaNOne(a, status);
+        if (! aSign) return a;
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+    }
+
+    if (aSign) {
+        if ((aExp | aSig) == 0) return packFloat64(aSign, 0, 0);
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return 0;
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = ((aExp - 0x3FF)>>1) + 0x3FE;
+    aSig |= BX_CONST64(0x0010000000000000);
+    zSig = estimateSqrt32(aExp, (Bit32u)(aSig>>21));
+    aSig <<= 9 - (aExp & 1);
+    zSig = estimateDiv128To64(aSig, 0, zSig<<32) + (zSig<<30);
+    if ((zSig & 0x1FF) <= 5) {
+        doubleZSig = zSig<<1;
+        mul64To128(zSig, zSig, &term0, &term1);
+        sub128(aSig, 0, term0, term1, &rem0, &rem1);
+        while ((Bit64s) rem0 < 0) {
+            --zSig;
+            doubleZSig -= 2;
+            add128(rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1);
+        }
+        zSig |= ((rem0 | rem1) != 0);
+    }
+    return roundAndPackFloat64(0, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine double-precision floating-point number class
+*----------------------------------------------------------------------------*/
+
+float_class_t float64_class(float64 a)
+{
+   Bit16s aExp = extractFloat64Exp(a);
+   Bit64u aSig = extractFloat64Frac(a);
+   int  aSign = extractFloat64Sign(a);
+
+   if(aExp == 0x7FF) {
+       if (aSig == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return (aSig & BX_CONST64(0x0008000000000000)) ? float_QNaN : float_SNaN;
+   }
+
+   if(aExp == 0) {
+       if (aSig == 0)
+           return float_zero;
+       return float_denormal;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two  double  precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise.
+*----------------------------------------------------------------------------*/
+
+int float64_compare(float64 a, float64 b, int quiet, struct float_status_t *status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float64_denormal_to_zero(a);
+        b = float64_denormal_to_zero(b);
+    }
+
+    float_class_t aClass = float64_class(a);
+    float_class_t bClass = float64_class(b);
+
+    if (aClass == float_SNaN || bClass == float_SNaN) {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_QNaN || bClass == float_QNaN) {
+        if (! quiet) float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a == b) || ((Bit64u) ((a | b)<<1) == 0)) return float_relation_equal;
+
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    if (aSign ^ (a < b)) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare between two double precision floating point numbers and return the
+| smaller of them.
+*----------------------------------------------------------------------------*/
+
+float64 float64_min(float64 a, float64 b, struct float_status_t *status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float64_denormal_to_zero(a);
+    b = float64_denormal_to_zero(b);
+  }
+
+  return (float64_compare_two(a, b, status) == float_relation_less) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Compare between two double precision floating point numbers and return the
+| larger of them.
+*----------------------------------------------------------------------------*/
+
+float64 float64_max(float64 a, float64 b, struct float_status_t *status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float64_denormal_to_zero(a);
+    b = float64_denormal_to_zero(b);
+  }
+
+  return (float64_compare_two(a, b, status) == float_relation_greater) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Compare between two  double precision  floating point numbers and  return the
+| smaller/larger of them. The operation  is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_minmax(float64 a, float64 b, int is_max, int is_abs, struct float_status_t *status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float64_denormal_to_zero(a);
+        b = float64_denormal_to_zero(b);
+    }
+
+    if (float64_is_nan(a) || float64_is_nan(b)) {
+        if (float64_is_signaling_nan(a)) {
+            return propagateFloat64NaNOne(a, status);
+        }
+        if (float64_is_signaling_nan(b)) {
+            return propagateFloat64NaNOne(b, status);
+        }
+        if (! float64_is_nan(b)) {
+            if (float64_is_denormal(b))
+                float_raise(status, float_flag_denormal);
+            return b;
+        }
+        if (! float64_is_nan(a)) {
+            if (float64_is_denormal(a))
+                float_raise(status, float_flag_denormal);
+            return a;
+        }
+        return propagateFloat64NaN(a, b, status);
+    }
+
+    float64 tmp_a = a, tmp_b = b;
+    if (is_abs) {
+        tmp_a &= ~BX_CONST64(0x8000000000000000); // clear the sign bit
+        tmp_b &= ~BX_CONST64(0x8000000000000000);
+    }
+
+    int aSign = extractFloat64Sign(tmp_a);
+    int bSign = extractFloat64Sign(tmp_b);
+
+    if (float64_is_denormal(a) || float64_is_denormal(b))
+        float_raise(status, float_flag_denormal);
+
+    if (aSign != bSign) {
+        if (! is_max) {
+            return aSign ? a : b;
+        } else {
+            return aSign ? b : a;
+        }
+    } else {
+        if (! is_max) {
+            return (aSign ^ (tmp_a < tmp_b)) ? a : b;
+        } else {
+            return (aSign ^ (tmp_a < tmp_b)) ? b : a;
+        }
+    }
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80(Bit32s a)
+{
+    if (a == 0) return packFloatx80(0, 0, 0);
+    int   zSign = (a < 0);
+    Bit32u absA = zSign ? -a : a;
+    int    shiftCount = countLeadingZeros32(absA) + 32;
+    Bit64u zSig = absA;
+    return packFloatx80(zSign, 0x403E - shiftCount, zSig<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80(Bit64s a)
+{
+    if (a == 0) return packFloatx80(0, 0, 0);
+    int   zSign = (a < 0);
+    Bit64u absA = zSign ? -a : a;
+    int    shiftCount = countLeadingZeros64(absA);
+    return packFloatx80(zSign, 0x403E - shiftCount, absA<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80(float32 a, struct float_status_t *status)
+{
+    Bit32u aSig = extractFloat32Frac(a);
+    Bit16s aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+    if (aExp == 0xFF) {
+        if (aSig) return commonNaNToFloatx80(float32ToCommonNaN(a, status));
+        return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloatx80(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    aSig |= 0x00800000;
+    return packFloatx80(aSign, aExp + 0x3F80, ((Bit64u) aSig)<<40);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80(float64 a, struct float_status_t *status)
+{
+    Bit64u aSig = extractFloat64Frac(a);
+    Bit16s aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return commonNaNToFloatx80(float64ToCommonNaN(a, status));
+        return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloatx80(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    return
+        packFloatx80(
+            aSign, aExp + 0x3C00, (aSig | BX_CONST64(0x0010000000000000))<<11);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s floatx80_to_int32(floatx80 a, struct float_status_t *status)
+{
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int32_indefinite;
+    }
+
+    if ((aExp == 0x7FFF) && (Bit64u) (aSig<<1)) aSign = 0;
+    int shiftCount = 0x4037 - aExp;
+    if (shiftCount <= 0) shiftCount = 1;
+    aSig = shift64RightJamming(aSig, shiftCount);
+    return roundAndPackInt32(aSign, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s floatx80_to_int32_round_to_zero(floatx80 a, struct float_status_t *status)
+{
+    Bit32s aExp;
+    Bit64u aSig, savedASig;
+    Bit32s z;
+    int shiftCount;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int32_indefinite;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    if (aExp > 0x401E) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    if (aExp < 0x3FFF) {
+        if (aExp || aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    shiftCount = 0x403E - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = (Bit32s) aSig;
+    if (aSign) z = -z;
+    if ((z < 0) ^ aSign) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    if ((aSig<<shiftCount) != savedASig)
+    {
+        float_raise(status, float_flag_inexact);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s floatx80_to_int64(floatx80 a, struct float_status_t *status)
+{
+    Bit32s aExp;
+    Bit64u aSig, aSigExtra;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int64_indefinite;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    int shiftCount = 0x403E - aExp;
+    if (shiftCount <= 0)
+    {
+        if (shiftCount)
+        {
+            float_raise(status, float_flag_invalid);
+            return (Bit64s)(int64_indefinite);
+        }
+        aSigExtra = 0;
+    }
+    else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+
+    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s floatx80_to_int64_round_to_zero(floatx80 a, struct float_status_t *status)
+{
+    int aSign;
+    Bit32s aExp;
+    Bit64u aSig;
+    Bit64s z;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int64_indefinite;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    int shiftCount = aExp - 0x403E;
+    if (0 <= shiftCount) {
+        aSig &= BX_CONST64(0x7FFFFFFFFFFFFFFF);
+        if ((a.exp != 0xC03E) || aSig) {
+            float_raise(status, float_flag_invalid);
+        }
+        return (Bit64s)(int64_indefinite);
+    }
+    else if (aExp < 0x3FFF) {
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    z = aSig>>(-shiftCount);
+    if ((Bit64u) (aSig<<(shiftCount & 63))) {
+        float_raise(status, float_flag_inexact);
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32(floatx80 a, struct float_status_t *status)
+{
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1))
+            return commonNaNToFloat32(floatx80ToCommonNaN(a, status));
+
+        return packFloat32(aSign, 0xFF, 0);
+    }
+    aSig = shift64RightJamming(aSig, 33);
+    if (aExp || aSig) aExp -= 0x3F81;
+    return roundAndPackFloat32(aSign, aExp, (Bit32u) aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64(floatx80 a, struct float_status_t *status)
+{
+    Bit32s aExp;
+    Bit64u aSig, zSig;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)) {
+            return commonNaNToFloat64(floatx80ToCommonNaN(a, status));
+        }
+        return packFloat64(aSign, 0x7FF, 0);
+    }
+    zSig = shift64RightJamming(aSig, 1);
+    if (aExp || aSig) aExp -= 0x3C01;
+    return roundAndPackFloat64(aSign, aExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended double-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int(floatx80 a, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    int aSign;
+    Bit64u lastBitMask, roundBitsMask;
+    int roundingMode = get_float_rounding_mode(status);
+    floatx80 z;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit32s aExp = extractFloatx80Exp(a);
+    Bit64u aSig = extractFloatx80Frac(a);
+    if (0x403E <= aExp) {
+        if ((aExp == 0x7FFF) && (Bit64u) (aSig<<1)) {
+            return propagateFloatx80NaNOne(a, status);
+        }
+        return a;
+    }
+    if (aExp < 0x3FFF) {
+        if (aExp == 0) {
+            if ((aSig<<1) == 0) return a;
+            float_raise(status, float_flag_denormal);
+        }
+        float_raise(status, float_flag_inexact);
+        aSign = extractFloatx80Sign(a);
+        switch (roundingMode) {
+         case float_round_nearest_even:
+            if ((aExp == 0x3FFE) && (Bit64u) (aSig<<1)) {
+                set_float_rounding_up(status);
+                return packFloatx80(aSign, 0x3FFF, BX_CONST64(0x8000000000000000));
+            }
+            break;
+         case float_round_down:
+            if (aSign) {
+                set_float_rounding_up(status);
+                return packFloatx80(1, 0x3FFF, BX_CONST64(0x8000000000000000));
+            }
+            else {
+                return packFloatx80(0, 0, 0);
+            }
+         case float_round_up:
+            if (aSign) {
+                return packFloatx80(1, 0, 0);
+            }
+            else {
+                set_float_rounding_up(status);
+                return packFloatx80(0, 0x3FFF, BX_CONST64(0x8000000000000000));
+            }
+        }
+        return packFloatx80(aSign, 0, 0);
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x403E - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    if (roundingMode == float_round_nearest_even) {
+        z.fraction += lastBitMask>>1;
+        if ((z.fraction & roundBitsMask) == 0) z.fraction &= ~lastBitMask;
+    }
+    else if (roundingMode != float_round_to_zero) {
+        if (extractFloatx80Sign(z) ^ (roundingMode == float_round_up))
+            z.fraction += roundBitsMask;
+    }
+    z.fraction &= ~roundBitsMask;
+    if (z.fraction == 0) {
+        z.exp++;
+        z.fraction = BX_CONST64(0x8000000000000000);
+    }
+    if (z.fraction != a.fraction) {
+        float_raise(status, float_flag_inexact);
+        if (z.fraction > a.fraction || z.exp > a.exp)
+            set_float_rounding_up(status);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, int zSign, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
+            return propagateFloatx80NaN(a, b, status);
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if ((bExp == 0) && bSig) {
+                float_raise(status, float_flag_denormal);
+                normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+            }
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign, bExp, bSig, 0, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0)
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign, aExp, aSig, 0, status);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    Bit32s expDiff = aExp - bExp;
+    zExp = aExp;
+    if (0 < expDiff) {
+        shift64ExtraRightJamming(bSig, 0,  expDiff, &bSig, &zSig1);
+    }
+    else if (expDiff < 0) {
+        shift64ExtraRightJamming(aSig, 0, -expDiff, &aSig, &zSig1);
+        zExp = bExp;
+    }
+    else {
+        zSig0 = aSig + bSig;
+        zSig1 = 0;
+        goto shiftRight1;
+    }
+    zSig0 = aSig + bSig;
+    if ((Bit64s) zSig0 < 0) goto roundAndPack;
+ shiftRight1:
+    shift64ExtraRightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
+    zSig0 |= BX_CONST64(0x8000000000000000);
+    zExp++;
+ roundAndPack:
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+            zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, int zSign, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (bExp == 0x7FFF) {
+            if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign ^ 1, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bExp == 0) {
+                if (bSig) {
+                    float_raise(status, float_flag_denormal);
+                    normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+                    return roundAndPackFloatx80(get_float_rounding_precision(status),
+                        zSign ^ 1, bExp, bSig, 0, status);
+                }
+                return packFloatx80(get_float_rounding_mode(status) == float_round_down, 0, 0);
+            }
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign ^ 1, bExp, bSig, 0, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0)
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign, aExp, aSig, 0, status);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    Bit32s expDiff = aExp - bExp;
+    if (0 < expDiff) {
+        shift128RightJamming(bSig, 0, expDiff, &bSig, &zSig1);
+        goto aBigger;
+    }
+    if (expDiff < 0) {
+        shift128RightJamming(aSig, 0, -expDiff, &aSig, &zSig1);
+        goto bBigger;
+    }
+    zSig1 = 0;
+    if (bSig < aSig) goto aBigger;
+    if (aSig < bSig) goto bBigger;
+    return packFloatx80(get_float_rounding_mode(status) == float_round_down, 0, 0);
+ bBigger:
+    sub128(bSig, 0, aSig, zSig1, &zSig0, &zSig1);
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aBigger:
+    sub128(aSig, 0, bSig, zSig1, &zSig0, &zSig1);
+    zExp = aExp;
+ normalizeRoundAndPack:
+    return
+        normalizeRoundAndPackFloatx80(get_float_rounding_precision(status),
+            zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aSign == bSign)
+        return addFloatx80Sigs(a, b, aSign, status);
+    else
+        return subFloatx80Sigs(a, b, aSign, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aSign == bSign)
+        return subFloatx80Sigs(a, b, aSign, status);
+    else
+        return addFloatx80Sigs(a, b, aSign, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    int aSign, bSign, zSign;
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+ invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    bSign = extractFloatx80Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if (bExp == 0) {
+            if (bSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aExp == 0) {
+            if (aSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloatx80(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    zExp = aExp + bExp - 0x3FFE;
+    mul64To128(aSig, bSig, &zSig0, &zSig1);
+    if (0 < (Bit64s) zSig0) {
+        shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+             zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    int aSign, bSign, zSign;
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig, bSig, zSig0, zSig1;
+    Bit64u rem0, rem1, rem2, term0, term1, term2;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    bSign = extractFloatx80Sign(b);
+
+    zSign = aSign ^ bSign;
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (bExp == 0x7FFF) {
+            if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign, 0, 0);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if ((aExp | aSig) == 0) {
+                float_raise(status, float_flag_invalid);
+                return floatx80_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = aExp - bExp + 0x3FFE;
+    rem1 = 0;
+    if (bSig <= aSig) {
+        shift128Right(aSig, 0, 1, &aSig, &rem1);
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64(aSig, rem1, bSig);
+    mul64To128(bSig, zSig0, &term0, &term1);
+    sub128(aSig, rem1, term0, term1, &rem0, &rem1);
+    while ((Bit64s) rem0 < 0) {
+        --zSig0;
+        add128(rem0, rem1, 0, bSig, &rem0, &rem1);
+    }
+    zSig1 = estimateDiv128To64(rem1, 0, bSig);
+    if ((Bit64u) (zSig1<<1) <= 8) {
+        mul64To128(bSig, zSig1, &term1, &term2);
+        sub128(rem1, 0, term1, term2, &rem1, &rem2);
+        while ((Bit64s) rem1 < 0) {
+            --zSig1;
+            add128(rem1, rem2, 0, bSig, &rem1, &rem2);
+        }
+        zSig1 |= ((rem1 | rem2) != 0);
+    }
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+            zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt(floatx80 a, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    int aSign;
+    Bit32s aExp, zExp;
+    Bit64u aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+    Bit64u rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1)) return propagateFloatx80NaNOne(a, status);
+        if (! aSign) return a;
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+    if (aSign) {
+        if ((aExp | aSig0) == 0) return a;
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig0 == 0) return packFloatx80(0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    zExp = ((aExp - 0x3FFF)>>1) + 0x3FFF;
+    zSig0 = estimateSqrt32(aExp, aSig0>>32);
+    shift128Right(aSig0, 0, 2 + (aExp & 1), &aSig0, &aSig1);
+    zSig0 = estimateDiv128To64(aSig0, aSig1, zSig0<<32) + (zSig0<<30);
+    doubleZSig0 = zSig0<<1;
+    mul64To128(zSig0, zSig0, &term0, &term1);
+    sub128(aSig0, aSig1, term0, term1, &rem0, &rem1);
+    while ((Bit64s) rem0 < 0) {
+        --zSig0;
+        doubleZSig0 -= 2;
+        add128(rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1);
+    }
+    zSig1 = estimateDiv128To64(rem1, 0, doubleZSig0);
+    if ((zSig1 & BX_CONST64(0x3FFFFFFFFFFFFFFF)) <= 5) {
+        if (zSig1 == 0) zSig1 = 1;
+        mul64To128(doubleZSig0, zSig1, &term1, &term2);
+        sub128(rem1, 0, term1, term2, &rem1, &rem2);
+        mul64To128(zSig1, zSig1, &term2, &term3);
+        sub192(rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3);
+        while ((Bit64s) rem1 < 0) {
+            --zSig1;
+            shortShift128Left(0, zSig1, 1, &term2, &term3);
+            term3 |= 1;
+            term2 |= doubleZSig0;
+            add192(rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3);
+        }
+        zSig1 |= ((rem1 | rem2 | rem3) != 0);
+    }
+    shortShift128Left(0, zSig1, 1, &zSig0, &zSig1);
+    zSig0 |= doubleZSig0;
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+            0, zExp, zSig0, zSig1, status);
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128(floatx80 a, struct float_status_t *status)
+{
+    Bit64u zSig0, zSig1;
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int   aSign = extractFloatx80Sign(a);
+
+    if ((aExp == 0x7FFF) && (Bit64u) (aSig<<1))
+        return commonNaNToFloat128(floatx80ToCommonNaN(a, status));
+
+    shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+    return packFloat128Four(aSign, aExp, zSig0, zSig1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80(float128 a, struct float_status_t *status)
+{
+    Bit32s aExp;
+    Bit64u aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    int aSign = extractFloat128Sign(a);
+
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1)
+            return commonNaNToFloatx80(float128ToCommonNaN(a, status));
+
+        return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+
+    if (aExp == 0) {
+        if ((aSig0 | aSig1) == 0) return packFloatx80(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1);
+    }
+    else aSig0 |= BX_CONST64(0x0001000000000000);
+
+    shortShift128Left(aSig0, aSig1, 15, &aSig0, &aSig1);
+    return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point value `a' and quadruple-precision floating point value `b'. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_128_mul(floatx80 a, float128 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig, bSig0, bSig1, zSig0, zSig1, zSig2;
+    int aSign, bSign, zSign;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+ invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig0 = extractFloat128Frac0(b);
+    bSig1 = extractFloat128Frac1(b);
+    bExp = extractFloat128Exp(b);
+    bSign = extractFloat128Sign(b);
+
+    zSign = aSign ^ bSign;
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)
+             || ((bExp == 0x7FFF) && (bSig0 | bSig1)))
+        {
+            floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status));
+            return propagateFloatx80NaN(a, r, status);
+        }
+        if (bExp == 0) {
+            if ((bSig0 | bSig1) == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) {
+            floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status));
+            return propagateFloatx80NaN(a, r, status);
+        }
+        if (aExp == 0) {
+            if (aSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if ((bExp == 0) && (bSig0 | bSig1)) float_raise(status, float_flag_denormal);
+            return packFloatx80(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
+    }
+    else bSig0 |= BX_CONST64(0x0001000000000000);
+
+    zExp = aExp + bExp - 0x3FFE;
+    shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1);
+    mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2);
+    if (0 < (Bit64s) zSig0) {
+        shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+             zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs(float128 a, float128 b, int zSign, struct float_status_t *status)
+{
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    Bit32s expDiff;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+    expDiff = aExp - bExp;
+
+    if (0 < expDiff) {
+        if (aExp == 0x7FFF) {
+            if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status);
+            return a;
+        }
+        if (bExp == 0) --expDiff;
+        else bSig0 |= BX_CONST64(0x0001000000000000);
+        shift128ExtraRightJamming(bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2);
+        zExp = aExp;
+    }
+    else if (expDiff < 0) {
+        if (bExp == 0x7FFF) {
+            if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+            return packFloat128Four(zSign, 0x7FFF, 0, 0);
+        }
+        if (aExp == 0) ++expDiff;
+        else aSig0 |= BX_CONST64(0x0001000000000000);
+        shift128ExtraRightJamming(aSig0, aSig1, 0, -expDiff, &aSig0, &aSig1, &zSig2);
+        zExp = bExp;
+    }
+    else {
+        if (aExp == 0x7FFF) {
+            if (aSig0 | aSig1 | bSig0 | bSig1)
+                return propagateFloat128NaN(a, b, status);
+
+            return a;
+        }
+        add128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1);
+        if (aExp == 0) return packFloat128Four(zSign, 0, zSig0, zSig1);
+        zSig2 = 0;
+        zSig0 |= BX_CONST64(0x0002000000000000);
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    aSig0 |= BX_CONST64(0x0001000000000000);
+    add128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1);
+    --zExp;
+    if (zSig0 < BX_CONST64(0x0002000000000000)) goto roundAndPack;
+    ++zExp;
+ shiftRight1:
+    shift128ExtraRightJamming(zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2);
+ roundAndPack:
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs(float128 a, float128 b, int zSign, struct float_status_t *status)
+{
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    Bit32s expDiff;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+
+    expDiff = aExp - bExp;
+    shortShift128Left(aSig0, aSig1, 14, &aSig0, &aSig1);
+    shortShift128Left(bSig0, bSig1, 14, &bSig0, &bSig1);
+    if (0 < expDiff) goto aExpBigger;
+    if (expDiff < 0) goto bExpBigger;
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1 | bSig0 | bSig1)
+            return propagateFloat128NaN(a, b, status);
+
+        float_raise(status, float_flag_invalid);
+        return float128_default_nan;
+    }
+    if (aExp == 0) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if (bSig0 < aSig0) goto aBigger;
+    if (aSig0 < bSig0) goto bBigger;
+    if (bSig1 < aSig1) goto aBigger;
+    if (aSig1 < bSig1) goto bBigger;
+    return packFloat128(0, 0);
+
+ bExpBigger:
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+        return packFloat128Four(zSign ^ 1, 0x7FFF, 0, 0);
+    }
+    if (aExp == 0) ++expDiff;
+    else {
+        aSig0 |= BX_CONST64(0x4000000000000000);
+    }
+    shift128RightJamming(aSig0, aSig1, - expDiff, &aSig0, &aSig1);
+    bSig0 |= BX_CONST64(0x4000000000000000);
+ bBigger:
+    sub128(bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1);
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status);
+        return a;
+    }
+    if (bExp == 0) --expDiff;
+    else {
+        bSig0 |= BX_CONST64(0x4000000000000000);
+    }
+    shift128RightJamming(bSig0, bSig1, expDiff, &bSig0, &bSig1);
+    aSig0 |= BX_CONST64(0x4000000000000000);
+ aBigger:
+    sub128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1);
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add(float128 a, float128 b, struct float_status_t *status)
+{
+    int aSign = extractFloat128Sign(a);
+    int bSign = extractFloat128Sign(b);
+
+    if (aSign == bSign) {
+        return addFloat128Sigs(a, b, aSign, status);
+    }
+    else {
+        return subFloat128Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub(float128 a, float128 b, struct float_status_t *status)
+{
+    int aSign = extractFloat128Sign(a);
+    int bSign = extractFloat128Sign(b);
+
+    if (aSign == bSign) {
+        return subFloat128Sigs(a, b, aSign, status);
+    }
+    else {
+        return addFloat128Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul(float128 a, float128 b, struct float_status_t *status)
+{
+    int aSign, bSign, zSign;
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    aSign = extractFloat128Sign(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+    bSign = extractFloat128Sign(b);
+
+    zSign = aSign ^ bSign;
+    if (aExp == 0x7FFF) {
+        if ((aSig0 | aSig1) || ((bExp == 0x7FFF) && (bSig0 | bSig1))) {
+            return propagateFloat128NaN(a, b, status);
+        }
+        if ((bExp | bSig0 | bSig1) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float128_default_nan;
+        }
+        return packFloat128Four(zSign, 0x7FFF, 0, 0);
+    }
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+        if ((aExp | aSig0 | aSig1) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float128_default_nan;
+        }
+        return packFloat128Four(zSign, 0x7FFF, 0, 0);
+    }
+    if (aExp == 0) {
+        if ((aSig0 | aSig1) == 0) return packFloat128Four(zSign, 0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1);
+    }
+    if (bExp == 0) {
+        if ((bSig0 | bSig1) == 0) return packFloat128Four(zSign, 0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
+    }
+    zExp = aExp + bExp - 0x4000;
+    aSig0 |= BX_CONST64(0x0001000000000000);
+    shortShift128Left(bSig0, bSig1, 16, &bSig0, &bSig1);
+    mul128To256(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3);
+    add128(zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1);
+    zSig2 |= (zSig3 != 0);
+    if (BX_CONST64(0x0002000000000000) <= zSig0) {
+        shift128ExtraRightJamming(zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2);
+        ++zExp;
+    }
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div(float128 a, float128 b, struct float_status_t *status)
+{
+    int aSign, bSign, zSign;
+    Bit32s aExp, bExp, zExp;
+    Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    Bit64u rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    aSign = extractFloat128Sign(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+    bSign = extractFloat128Sign(b);
+
+    zSign = aSign ^ bSign;
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status);
+        if (bExp == 0x7FFF) {
+            if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return float128_default_nan;
+        }
+        return packFloat128Four(zSign, 0x7FFF, 0, 0);
+    }
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+        return packFloat128Four(zSign, 0, 0, 0);
+    }
+    if (bExp == 0) {
+        if ((bSig0 | bSig1) == 0) {
+            if ((aExp | aSig0 | aSig1) == 0) {
+                float_raise(status, float_flag_invalid);
+                return float128_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloat128Four(zSign, 0x7FFF, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
+    }
+    if (aExp == 0) {
+        if ((aSig0 | aSig1) == 0) return packFloat128Four(zSign, 0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1);
+    }
+    zExp = aExp - bExp + 0x3FFD;
+    shortShift128Left(
+        aSig0 | BX_CONST64(0x0001000000000000), aSig1, 15, &aSig0, &aSig1);
+    shortShift128Left(
+        bSig0 | BX_CONST64(0x0001000000000000), bSig1, 15, &bSig0, &bSig1);
+    if (le128(bSig0, bSig1, aSig0, aSig1)) {
+        shift128Right(aSig0, aSig1, 1, &aSig0, &aSig1);
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64(aSig0, aSig1, bSig0);
+    mul128By64To192(bSig0, bSig1, zSig0, &term0, &term1, &term2);
+    sub192(aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2);
+    while ((Bit64s) rem0 < 0) {
+        --zSig0;
+        add192(rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2);
+    }
+    zSig1 = estimateDiv128To64(rem1, rem2, bSig0);
+    if ((zSig1 & 0x3FFF) <= 4) {
+        mul128By64To192(bSig0, bSig1, zSig1, &term1, &term2, &term3);
+        sub192(rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3);
+        while ((Bit64s) rem1 < 0) {
+            --zSig1;
+            add192(rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3);
+        }
+        zSig1 |= ((rem1 | rem2 | rem3) != 0);
+    }
+    shift128ExtraRightJamming(zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2);
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128(Bit64s a)
+{
+    Bit64u zSig0, zSig1;
+
+    if (a == 0) return packFloat128Four(0, 0, 0, 0);
+    int zSign = (a < 0);
+    Bit64u absA = zSign ? - a : a;
+    Bit8u shiftCount = countLeadingZeros64(absA) + 49;
+    Bit32s zExp = 0x406E - shiftCount;
+    if (64 <= shiftCount) {
+        zSig1 = 0;
+        zSig0 = absA;
+        shiftCount -= 64;
+    }
+    else {
+        zSig1 = absA;
+        zSig0 = 0;
+    }
+    shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    return packFloat128Four(zSign, zExp, zSig0, zSig1);
+}
+
+#endif
diff --git a/src/cpu/softfloat/softfloat.h b/src/cpu/softfloat/softfloat.h
new file mode 100644
index 000000000..1d1b0f08f
--- /dev/null
+++ b/src/cpu/softfloat/softfloat.h
@@ -0,0 +1,488 @@
+/*============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "config.h"      /* generated by configure script from config.h.in */
+
+#ifndef _SOFTFLOAT_H_
+#define _SOFTFLOAT_H_
+
+#define FLOAT16
+#define FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+#ifdef FLOAT16
+typedef Bit16u float16;
+#endif
+typedef Bit32u float32;
+typedef Bit64u float64;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point class.
+*----------------------------------------------------------------------------*/
+typedef enum {
+    float_zero,
+    float_SNaN,
+    float_QNaN,
+    float_negative_inf,
+    float_positive_inf,
+    float_denormal,
+    float_normalized
+} float_class_t;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point NaN operands handling mode.
+*----------------------------------------------------------------------------*/
+enum float_nan_handling_mode_t {
+    float_larger_significand_nan = 0,   // this mode used by x87 FPU
+    float_first_operand_nan = 1	        // this mode used by SSE
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum float_round_t {
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum float_exception_flag_t {
+    float_flag_invalid   = 0x01,
+    float_flag_denormal  = 0x02,
+    float_flag_divbyzero = 0x04,
+    float_flag_overflow  = 0x08,
+    float_flag_underflow = 0x10,
+    float_flag_inexact   = 0x20
+};
+
+extern const unsigned float_all_exceptions_mask;
+
+#ifdef FLOATX80
+#define RAISE_SW_C1 0x0200
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+    float_relation_less      = -1,
+    float_relation_equal     =  0,
+    float_relation_greater   =  1,
+    float_relation_unordered =  2
+};
+
+/*----------------------------------------------------------------------------
+| Options to indicate which negations to perform in float*_muladd()
+| Using these differs from negating an input or output before calling
+| the muladd function in that this means that a NaN doesn't have its
+| sign bit inverted before it is propagated.
+*----------------------------------------------------------------------------*/
+enum {
+    float_muladd_negate_c       = 1,
+    float_muladd_negate_product = 2,
+    float_muladd_negate_result  = float_muladd_negate_c | float_muladd_negate_product
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point status structure.
+*----------------------------------------------------------------------------*/
+struct float_status_t
+{
+#ifdef FLOATX80
+    int float_rounding_precision;	/* floatx80 only */
+#endif
+    int float_rounding_mode;
+    int float_exception_flags;
+    int float_exception_masks;
+    int float_suppress_exception;
+    int float_nan_handling_mode;	/* flag register */
+    int flush_underflow_to_zero;	/* flag register */
+    int denormals_are_zeros;            /* flag register */
+};
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void float_raise(struct float_status_t *status, int flags)
+{
+    status->float_exception_flags |= flags;
+}
+
+/*----------------------------------------------------------------------------
+| Returns raised IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_exception_flags(const struct float_status_t *status)
+{
+    return status->float_exception_flags & ~status->float_suppress_exception;
+}
+
+/*----------------------------------------------------------------------------
+| Routine to check if any or all of the software IEC/IEEE floating-point
+| exceptions are masked.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float_exception_masked(const struct float_status_t *status, int flag)
+{
+    return status->float_exception_masks & flag;
+}
+
+/*----------------------------------------------------------------------------
+| Returns current floating point rounding mode specified by status word.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_float_rounding_mode(const struct float_status_t *status)
+{
+    return status->float_rounding_mode;
+}
+
+/*----------------------------------------------------------------------------
+| Returns current floating point precision (floatx80 only).
+*----------------------------------------------------------------------------*/
+
+#ifdef FLOATX80
+BX_CPP_INLINE int get_float_rounding_precision(const struct float_status_t *status)
+{
+    return status->float_rounding_precision;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns current floating point NaN operands handling mode specified
+| by status word.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_float_nan_handling_mode(const struct float_status_t *status)
+{
+    return status->float_nan_handling_mode;
+}
+
+/*----------------------------------------------------------------------------
+| Raise floating point precision lost up flag (floatx80 only).
+*----------------------------------------------------------------------------*/
+
+#ifdef FLOATX80
+BX_CPP_INLINE void set_float_rounding_up(struct float_status_t *status)
+{
+    status->float_exception_flags |= RAISE_SW_C1;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the <denormals-are-zeros> feature is supported;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_denormals_are_zeros(const struct float_status_t *status)
+{
+    return status->denormals_are_zeros;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the <flush-underflow-to-zero> feature is supported;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_flush_underflow_to_zero(const struct float_status_t *status)
+{
+    return status->flush_underflow_to_zero;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32(Bit32s, struct float_status_t *status);
+float64 int32_to_float64(Bit32s);
+float32 int64_to_float32(Bit64s, struct float_status_t *status);
+float64 int64_to_float64(Bit64s, struct float_status_t *status);
+
+float32 uint32_to_float32(Bit32u, struct float_status_t *status);
+float64 uint32_to_float64(Bit32u);
+float32 uint64_to_float32(Bit64u, struct float_status_t *status);
+float64 uint64_to_float64(Bit64u, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+Bit32s float32_to_int32(float32, struct float_status_t *status);
+Bit32s float32_to_int32_round_to_zero(float32, struct float_status_t *status);
+Bit64s float32_to_int64(float32, struct float_status_t *status);
+Bit64s float32_to_int64_round_to_zero(float32, struct float_status_t *status);
+Bit32u float32_to_uint32(float32, struct float_status_t *status);
+Bit32u float32_to_uint32_round_to_zero(float32, struct float_status_t *status);
+Bit64u float32_to_uint64(float32, struct float_status_t *status);
+Bit64u float32_to_uint64_round_to_zero(float32, struct float_status_t *status);
+float64 float32_to_float64(float32, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int(float32, Bit8u scale, struct float_status_t *status);
+float32 float32_add(float32, float32, struct float_status_t *status);
+float32 float32_sub(float32, float32, struct float_status_t *status);
+float32 float32_mul(float32, float32, struct float_status_t *status);
+float32 float32_div(float32, float32, struct float_status_t *status);
+float32 float32_sqrt(float32, struct float_status_t *status);
+float32 float32_frc(float32, struct float_status_t *status);
+float32 float32_muladd(float32, float32, float32, int flags, struct float_status_t *status);
+float32 float32_scalef(float32, float32, struct float_status_t *status);
+int float32_compare(float32, float32, int quiet, struct float_status_t *status);
+
+BX_CPP_INLINE float32 float32_round_to_int_one(float32 a, struct float_status_t *status)
+{
+  return float32_round_to_int(a, 0, status);
+}
+
+BX_CPP_INLINE float32 float32_fmadd(float32 a, float32 b, float32 c, struct float_status_t *status)
+{
+  return float32_muladd(a, b, c, 0, status);
+}
+
+BX_CPP_INLINE float32 float32_fmsub(float32 a, float32 b, float32 c, struct float_status_t *status)
+{
+  return float32_muladd(a, b, c, float_muladd_negate_c, status);
+}
+
+BX_CPP_INLINE float32 float32_fnmadd(float32 a, float32 b, float32 c, struct float_status_t *status)
+{
+  return float32_muladd(a, b, c, float_muladd_negate_product, status);
+}
+
+BX_CPP_INLINE float32 float32_fnmsub(float32 a, float32 b, float32 c, struct float_status_t *status)
+{
+  return float32_muladd(a, b, c, float_muladd_negate_result, status);
+}
+
+BX_CPP_INLINE int float32_compare_two(float32 a, float32 b, struct float_status_t *status)
+{
+  return float32_compare(a, b, 0, status);
+}
+
+BX_CPP_INLINE int float32_compare_quiet(float32 a, float32 b, struct float_status_t *status)
+{
+  return float32_compare(a, b, 1, status);
+}
+
+float_class_t float32_class(float32);
+
+float32 float32_min(float32 a, float32 b, struct float_status_t *status);
+float32 float32_max(float32 a, float32 b, struct float_status_t *status);
+
+float32 float32_minmax(float32 a, float32 b, int is_max, int is_abs, struct float_status_t *status);
+float32 float32_getexp(float32 a, struct float_status_t *status);
+float32 float32_getmant(float32 a, struct float_status_t *status, int sign_ctrl, int interv);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+Bit32s float64_to_int32(float64, struct float_status_t *status);
+Bit32s float64_to_int32_round_to_zero(float64, struct float_status_t *status);
+Bit64s float64_to_int64(float64, struct float_status_t *status);
+Bit64s float64_to_int64_round_to_zero(float64, struct float_status_t *status);
+Bit32u float64_to_uint32(float64, struct float_status_t *status);
+Bit32u float64_to_uint32_round_to_zero(float64, struct float_status_t *status);
+Bit64u float64_to_uint64(float64, struct float_status_t *status);
+Bit64u float64_to_uint64_round_to_zero(float64, struct float_status_t *status);
+float32 float64_to_float32(float64, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int(float64, Bit8u scale, struct float_status_t *status);
+float64 float64_add(float64, float64, struct float_status_t *status);
+float64 float64_sub(float64, float64, struct float_status_t *status);
+float64 float64_mul(float64, float64, struct float_status_t *status);
+float64 float64_div(float64, float64, struct float_status_t *status);
+float64 float64_sqrt(float64, struct float_status_t *status);
+float64 float64_frc(float64, struct float_status_t *status);
+float64 float64_muladd(float64, float64, float64, int flags, struct float_status_t *status);
+float64 float64_scalef(float64, float64, struct float_status_t *status);
+int float64_compare(float64, float64, int quiet, struct float_status_t *status);
+
+BX_CPP_INLINE float64 float64_round_to_int_one(float64 a, struct float_status_t *status)
+{
+  return float64_round_to_int(a, 0, status);
+}
+
+BX_CPP_INLINE float64 float64_fmadd(float64 a, float64 b, float64 c, struct float_status_t *status)
+{
+  return float64_muladd(a, b, c, 0, status);
+}
+
+BX_CPP_INLINE float64 float64_fmsub(float64 a, float64 b, float64 c, struct float_status_t *status)
+{
+  return float64_muladd(a, b, c, float_muladd_negate_c, status);
+}
+
+BX_CPP_INLINE float64 float64_fnmadd(float64 a, float64 b, float64 c, struct float_status_t *status)
+{
+  return float64_muladd(a, b, c, float_muladd_negate_product, status);
+}
+
+BX_CPP_INLINE float64 float64_fnmsub(float64 a, float64 b, float64 c, struct float_status_t *status)
+{
+  return float64_muladd(a, b, c, float_muladd_negate_result, status);
+}
+
+BX_CPP_INLINE int float64_compare_two(float64 a, float64 b, struct float_status_t *status)
+{
+  return float64_compare(a, b, 0, status);
+}
+
+BX_CPP_INLINE int float64_compare_quiet(float64 a, float64 b, struct float_status_t *status)
+{
+  return float64_compare(a, b, 1, status);
+}
+
+float_class_t float64_class(float64);
+
+float64 float64_min(float64 a, float64 b, struct float_status_t *status);
+float64 float64_max(float64 a, float64 b, struct float_status_t *status);
+
+float64 float64_minmax(float64 a, float64 b, int is_max, int is_abs, struct float_status_t *status);
+float64 float64_getexp(float64 a, struct float_status_t *status);
+float64 float64_getmant(float64 a, struct float_status_t *status, int sign_ctrl, int interv);
+
+#ifdef FLOAT16
+float32 float16_to_float32(float16, struct float_status_t *status);
+float16 float32_to_float16(float32, struct float_status_t *status);
+
+float_class_t float16_class(float16);
+#endif
+
+#ifdef FLOATX80
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+
+#ifdef BX_BIG_ENDIAN
+typedef struct floatx80 {	// leave alignment to compiler
+    Bit16u exp;
+    Bit64u fraction;
+}; floatx80
+#else
+typedef struct floatx80 {
+    Bit64u fraction;
+    Bit16u exp;
+} floatx80;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 int32_to_floatx80(Bit32s);
+floatx80 int64_to_floatx80(Bit64s);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 float32_to_floatx80(float32, struct float_status_t *status);
+floatx80 float64_to_floatx80(float64, struct float_status_t *status);
+
+Bit32s floatx80_to_int32(floatx80, struct float_status_t *status);
+Bit32s floatx80_to_int32_round_to_zero(floatx80, struct float_status_t *status);
+Bit64s floatx80_to_int64(floatx80, struct float_status_t *status);
+Bit64s floatx80_to_int64_round_to_zero(floatx80, struct float_status_t *status);
+
+float32 floatx80_to_float32(floatx80, struct float_status_t *status);
+float64 floatx80_to_float64(floatx80, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int(floatx80, struct float_status_t *status);
+floatx80 floatx80_add(floatx80, floatx80, struct float_status_t *status);
+floatx80 floatx80_sub(floatx80, floatx80, struct float_status_t *status);
+floatx80 floatx80_mul(floatx80, floatx80, struct float_status_t *status);
+floatx80 floatx80_div(floatx80, floatx80, struct float_status_t *status);
+floatx80 floatx80_sqrt(floatx80, struct float_status_t *status);
+
+float_class_t floatx80_class(floatx80);
+#ifdef __cplusplus
+}
+#endif
+#endif  /* FLOATX80 */
+
+#ifdef FLOAT128
+
+#ifdef BX_BIG_ENDIAN
+typedef struct float128 {
+    Bit64u hi, lo;
+} float128;
+#else
+typedef struct float128 {
+    Bit64u lo, hi;
+} float128;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float128 floatx80_to_float128(floatx80 a, struct float_status_t *status);
+floatx80 float128_to_floatx80(float128 a, struct float_status_t *status);
+
+float128 int64_to_float128(Bit64s a);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_128_mul(floatx80 a, float128 b, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_add(float128 a, float128 b, struct float_status_t *status);
+float128 float128_sub(float128 a, float128 b, struct float_status_t *status);
+float128 float128_mul(float128 a, float128 b, struct float_status_t *status);
+float128 float128_div(float128 a, float128 b, struct float_status_t *status);
+#ifdef __cplusplus
+}
+#endif
+#endif  /* FLOAT128 */
+
+#endif
diff --git a/src/cpu/softfloat/softfloat16.cc b/src/cpu/softfloat/softfloat16.cc
new file mode 100644
index 000000000..8c17d3a86
--- /dev/null
+++ b/src/cpu/softfloat/softfloat16.cc
@@ -0,0 +1,129 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.h"
+
+#ifdef FLOAT16
+
+#include "softfloat-round-pack.h"
+#include "softfloat-specialize.h"
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Determine half-precision floating-point number class
+*----------------------------------------------------------------------------*/
+
+float_class_t float16_class(float16 a)
+{
+   Bit16s aExp = extractFloat16Exp(a);
+   Bit16u aSig = extractFloat16Frac(a);
+   int  aSign = extractFloat16Sign(a);
+
+   if(aExp == 0x1F) {
+       if (aSig == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return (aSig & 0x200) ? float_QNaN : float_SNaN;
+   }
+
+   if(aExp == 0) {
+       if (aSig == 0) return float_zero;
+       return float_denormal;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float16_to_float32(float16 a, struct float_status_t *status)
+{
+    Bit16u aSig = extractFloat16Frac(a);
+    Bit16s aExp = extractFloat16Exp(a);
+    int aSign = extractFloat16Sign(a);
+
+    if (aExp == 0x1F) {
+        if (aSig) return commonNaNToFloat32(float16ToCommonNaN(a, status));
+        return packFloat32(aSign, 0xFF, 0);
+    }
+    if (aExp == 0) {
+        // ignore denormals_are_zeros flag
+        if (aSig == 0) return packFloat32(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat16Subnormal(aSig, &aExp, &aSig);
+        --aExp;
+    }
+
+    return packFloat32(aSign, aExp + 0x70, ((Bit32u) aSig)<<13);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the half-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float16 float32_to_float16(float32 a, struct float_status_t *status)
+{
+    Bit32u aSig = extractFloat32Frac(a);
+    Bit16s aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return commonNaNToFloat16(float32ToCommonNaN(a, status));
+        return packFloat16(aSign, 0x1F, 0);
+    }
+    if (aExp == 0) {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+        if (aSig == 0) return packFloat16(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+    }
+
+    aSig = shift32RightJamming(aSig, 9);
+    Bit16u zSig = (Bit16u) aSig;
+    if (aExp || zSig) {
+        zSig |= 0x4000;
+        aExp -= 0x71;
+    }
+
+    return roundAndPackFloat16(aSign, aExp, zSig, status);
+}
+
+#endif
diff --git a/src/cpu/softfloat/softfloat_poly.cc b/src/cpu/softfloat/softfloat_poly.cc
new file mode 100644
index 000000000..5c7079353
--- /dev/null
+++ b/src/cpu/softfloat/softfloat_poly.cc
@@ -0,0 +1,89 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#include <assert.h>
+#include "softfloat.h"
+
+//                            2         3         4               n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       2k                --        2k+1
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvalPoly(float128 x, float128 *arr, int n, struct float_status_t *status)
+{
+    float128 r = arr[--n];
+
+    do {
+        r = float128_mul(r, x, status);
+        r = float128_add(r, arr[--n], status);
+    } while (n > 0);
+
+    return r;
+}
+
+//                  2         4         6         8               2n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                    2
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvenPoly(float128 x, float128 *arr, int n, struct float_status_t *status)
+{
+     return EvalPoly(float128_mul(x, x, status), arr, n, status);
+}
+
+//                        3         5         7         9               2n+1
+// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//          0         1         2         3         4               n
+//                        2         4         6         8               2n
+//      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//               0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                        2
+//   f(x) ~ x * [ p(x) + x * q(x) ]
+//
+
+float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status)
+{
+     return float128_mul(x, EvenPoly(x, arr, n, status), status);
+}
diff --git a/src/cpu/softfloat/softfloatx80.cc b/src/cpu/softfloat/softfloatx80.cc
new file mode 100644
index 000000000..3ac3e61b3
--- /dev/null
+++ b/src/cpu/softfloat/softfloatx80.cc
@@ -0,0 +1,367 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "softfloat-macros.h"
+
+const floatx80 Const_QNaN = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+const floatx80 Const_Z    = packFloatx80(0, 0x0000, 0);
+const floatx80 Const_1    = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+const floatx80 Const_L2T  = packFloatx80(0, 0x4000, BX_CONST64(0xd49a784bcd1b8afe));
+const floatx80 Const_L2E  = packFloatx80(0, 0x3fff, BX_CONST64(0xb8aa3b295c17f0bc));
+const floatx80 Const_PI   = packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
+const floatx80 Const_LG2  = packFloatx80(0, 0x3ffd, BX_CONST64(0x9a209a84fbcff799));
+const floatx80 Const_LN2  = packFloatx80(0, 0x3ffe, BX_CONST64(0xb17217f7d1cf79ac));
+const floatx80 Const_INF  = packFloatx80(0, 0x7fff, BX_CONST64(0x8000000000000000));
+
+/*----------------------------------------------------------------------------
+| Commonly used single-precision floating point constants
+*----------------------------------------------------------------------------*/
+const float32 float32_negative_inf  = 0xff800000;
+const float32 float32_positive_inf  = 0x7f800000;
+const float32 float32_negative_zero = 0x80000000;
+const float32 float32_positive_zero = 0x00000000;
+const float32 float32_negative_one  = 0xbf800000;
+const float32 float32_positive_one  = 0x3f800000;
+const float32 float32_max_float     = 0x7f7fffff;
+const float32 float32_min_float     = 0xff7fffff;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+const float32 float32_default_nan   = 0xffc00000;
+
+/*----------------------------------------------------------------------------
+| Commonly used single-precision floating point constants
+*----------------------------------------------------------------------------*/
+const float64 float64_negative_inf  = BX_CONST64(0xfff0000000000000);
+const float64 float64_positive_inf  = BX_CONST64(0x7ff0000000000000);
+const float64 float64_negative_zero = BX_CONST64(0x8000000000000000);
+const float64 float64_positive_zero = BX_CONST64(0x0000000000000000);
+const float64 float64_negative_one  = BX_CONST64(0xbff0000000000000);
+const float64 float64_positive_one  = BX_CONST64(0x3ff0000000000000);
+const float64 float64_max_float     = BX_CONST64(0x7fefffffffffffff);
+const float64 float64_min_float     = BX_CONST64(0xffefffffffffffff);
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+const float64 float64_default_nan = BX_CONST64(0xFFF8000000000000);
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16(floatx80 a, struct float_status_t *status)
+{
+   if (floatx80_is_unsupported(a)) {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   Bit32s v32 = floatx80_to_int32(a, status);
+
+   if ((v32 > 32767) || (v32 < -32768)) {
+        status->float_exception_flags = float_flag_invalid; // throw away other flags
+        return int16_indefinite;
+   }
+
+   return (Bit16s) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16_round_to_zero(floatx80 a, struct float_status_t *status)
+{
+   if (floatx80_is_unsupported(a)) {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   Bit32s v32 = floatx80_to_int32_round_to_zero(a, status);
+
+   if ((v32 > 32767) || (v32 < -32768)) {
+        status->float_exception_flags = float_flag_invalid; // throw away other flags
+        return int16_indefinite;
+   }
+
+   return (Bit16s) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Separate the source extended double-precision floating point value `a'
+| into its exponent and significand, store the significant back to the
+| 'a' and return the exponent. The operation performed is a superset of
+| the IEC/IEEE recommended logb(x) function.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_extract(floatx80 *a, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit64u aSig = extractFloatx80Frac(*a);
+    Bit32s aExp = extractFloatx80Exp(*a);
+    int   aSign = extractFloatx80Sign(*a);
+
+    if (floatx80_is_unsupported(*a))
+    {
+        float_raise(status, float_flag_invalid);
+        *a = floatx80_default_nan;
+        return *a;
+    }
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1))
+        {
+            *a = propagateFloatx80NaNOne(*a, status);
+            return *a;
+        }
+        return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0)
+    {
+        if (aSig == 0) {
+            float_raise(status, float_flag_divbyzero);
+            *a = packFloatx80(aSign, 0, 0);
+            return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+
+    a->exp = (aSign << 15) + 0x3FFF;
+    a->fraction = aSig;
+    return int32_to_floatx80(aExp - 0x3FFF);
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by
+| value `b'. The function truncates the value in the second operand 'b' to
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    Bit32s aExp, bExp;
+    Bit64u aSig, bSig;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if ((bExp == 0x7FFF) && bSign) {
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            if (! bSign) {
+                float_raise(status, float_flag_invalid);
+                return floatx80_default_nan;
+            }
+            return a;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        if (bSign) return packFloatx80(aSign, 0, 0);
+        return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        if (aSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+        if (bExp < 0x3FFF)
+            return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0, status);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+
+    if (bExp > 0x400E) {
+        /* generate appropriate overflow/underflow */
+        return roundAndPackFloatx80(80, aSign,
+                          bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
+    }
+
+    if (bExp < 0x3FFF) return a;
+
+    int shiftCount = 0x403E - bExp;
+    bSig >>= shiftCount;
+    Bit32s scale = (Bit32s) bSig;
+    if (bSign) scale = -scale; /* -32768..32767 */
+    return
+        roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine extended-precision floating-point number class.
+*----------------------------------------------------------------------------*/
+
+float_class_t floatx80_class(floatx80 a)
+{
+   Bit32s aExp = extractFloatx80Exp(a);
+   Bit64u aSig = extractFloatx80Frac(a);
+
+   if(aExp == 0) {
+       if (aSig == 0)
+           return float_zero;
+
+       /* denormal or pseudo-denormal */
+       return float_denormal;
+   }
+
+   /* valid numbers have the MS bit set */
+   if (!(aSig & BX_CONST64(0x8000000000000000)))
+       return float_SNaN; /* report unsupported as SNaNs */
+
+   if(aExp == 0x7fff) {
+       int aSign = extractFloatx80Sign(a);
+
+       if (((Bit64u) (aSig<< 1)) == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return (aSig & BX_CONST64(0x4000000000000000)) ? float_QNaN : float_SNaN;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80 a, floatx80 b, int quiet, struct float_status_t *status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_SNaN || bClass == float_SNaN)
+    {
+        /* unsupported reported as SNaN */
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_QNaN || bClass == float_QNaN) {
+        if (! quiet) float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) {
+        float_raise(status, float_flag_denormal);
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aClass == float_zero) {
+        if (bClass == float_zero) return float_relation_equal;
+        return bSign ? float_relation_greater : float_relation_less;
+    }
+
+    if (bClass == float_zero || aSign != bSign) {
+        return aSign ? float_relation_less : float_relation_greater;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    Bit64u bSig = extractFloatx80Frac(b);
+    Bit32s bExp = extractFloatx80Exp(b);
+
+    if (aClass == float_denormal)
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+
+    if (bClass == float_denormal)
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+
+    if (aExp == bExp && aSig == bSig)
+        return float_relation_equal;
+
+    int less_than =
+        aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
+              : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
+
+
+int floatx80_compare_two(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+    return floatx80_compare(a, b, 0, status);
+}
+
+int floatx80_compare_quiet(floatx80 a, floatx80 b, struct float_status_t *status)
+{
+    return floatx80_compare(a, b, 1, status);
+}
diff --git a/src/cpu/softfloat/softfloatx80.h b/src/cpu/softfloat/softfloatx80.h
new file mode 100644
index 000000000..8378169e2
--- /dev/null
+++ b/src/cpu/softfloat/softfloatx80.h
@@ -0,0 +1,113 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOATX80_EXTENSIONS_H_
+#define _SOFTFLOATX80_EXTENSIONS_H_
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+Bit16s floatx80_to_int16(floatx80, struct float_status_t *status);
+Bit16s floatx80_to_int16_round_to_zero(floatx80, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_extract(floatx80 *a, struct float_status_t *status);
+floatx80 floatx80_scale(floatx80 a, floatx80 b, struct float_status_t *status);
+int floatx80_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status);
+int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status);
+floatx80 f2xm1(floatx80 a, struct float_status_t *status);
+floatx80 fyl2x(floatx80 a, floatx80 b, struct float_status_t *status);
+floatx80 fyl2xp1(floatx80 a, floatx80 b, struct float_status_t *status);
+floatx80 fpatan(floatx80 a, floatx80 b, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision trigonometric functions.
+*----------------------------------------------------------------------------*/
+
+int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, struct float_status_t *status);
+int fsin(floatx80 *a, struct float_status_t *status);
+int fcos(floatx80 *a, struct float_status_t *status);
+int ftan(floatx80 *a, struct float_status_t *status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision compare.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80, floatx80, int quiet, struct float_status_t *status);
+int floatx80_compare_two(floatx80 a, floatx80 b, struct float_status_t *status);
+int floatx80_compare_quiet(floatx80 a, floatx80 b, struct float_status_t *status);
+
+#ifdef __cplusplus
+}
+#endif
+
+/*-----------------------------------------------------------------------------
+| Calculates the absolute value of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 floatx80_abs(floatx80 reg)
+{
+    reg.exp &= 0x7FFF;
+    return reg;
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 floatx80_chs(floatx80 reg)
+{
+    reg.exp ^= 0x8000;
+    return reg;
+}
+
+/*-----------------------------------------------------------------------------
+| Commonly used extended double-precision floating-point constants.
+*----------------------------------------------------------------------------*/
+
+extern const floatx80 Const_Z;
+extern const floatx80 Const_1;
+extern const floatx80 Const_L2T;
+extern const floatx80 Const_L2E;
+extern const floatx80 Const_PI;
+extern const floatx80 Const_LG2;
+extern const floatx80 Const_LN2;
+extern const floatx80 Const_INF;
+#endif
diff --git a/src/cpu/x86.c b/src/cpu/x86.c
index 47250045f..76101c344 100644
--- a/src/cpu/x86.c
+++ b/src/cpu/x86.c
@@ -245,7 +245,7 @@ reset_common(int hard)
             /* TODO: Hack, but will do for time being, because all AT machines currently are 286+,
                      and vice-versa. */
             dma_set_at(is286);
-            device_reset_all();
+            device_reset_all(DEVICE_ALL);
         }
     }
 
@@ -322,7 +322,7 @@ reset_common(int hard)
 
         /* If we have an AT or PS/2 keyboard controller, make sure the A20 state
            is correct. */
-        kbc_at_a20_reset();
+        device_reset_all(DEVICE_KBC);
     }
 
     if (!is286)
@@ -359,7 +359,7 @@ hardresetx86(void)
     /* TODO: Hack, but will do for time being, because all AT machines currently are 286+,
        and vice-versa. */
     dma_set_at(is286);
-    device_reset_all();
+    device_reset_all(DEVICE_ALL);
 
     cpu_alt_reset = 0;
 
diff --git a/src/cpu/x86_ops.h b/src/cpu/x86_ops.h
index ca4d83d69..9d34c71ec 100644
--- a/src/cpu/x86_ops.h
+++ b/src/cpu/x86_ops.h
@@ -151,6 +151,7 @@ extern const OpFn dynarec_ops_3DNOWE[256];
 void x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f);
 #endif
 
+
 extern const OpFn *x86_opcodes;
 extern const OpFn *x86_opcodes_0f;
 extern const OpFn *x86_opcodes_d8_a16;
@@ -205,6 +206,38 @@ extern const OpFn ops_pentiumpro_0f[1024];
 extern const OpFn ops_pentium2_0f[1024];
 extern const OpFn ops_pentium2d_0f[1024];
 
+extern const OpFn ops_sf_fpu_287_d9_a16[256];
+extern const OpFn ops_sf_fpu_287_d9_a32[256];
+extern const OpFn ops_sf_fpu_287_da_a16[256];
+extern const OpFn ops_sf_fpu_287_da_a32[256];
+extern const OpFn ops_sf_fpu_287_db_a16[256];
+extern const OpFn ops_sf_fpu_287_db_a32[256];
+extern const OpFn ops_sf_fpu_287_dc_a16[32];
+extern const OpFn ops_sf_fpu_287_dc_a32[32];
+extern const OpFn ops_sf_fpu_287_dd_a16[256];
+extern const OpFn ops_sf_fpu_287_dd_a32[256];
+extern const OpFn ops_sf_fpu_287_de_a16[256];
+extern const OpFn ops_sf_fpu_287_de_a32[256];
+extern const OpFn ops_sf_fpu_287_df_a16[256];
+extern const OpFn ops_sf_fpu_287_df_a32[256];
+
+extern const OpFn ops_sf_fpu_d8_a16[32];
+extern const OpFn ops_sf_fpu_d8_a32[32];
+extern const OpFn ops_sf_fpu_d9_a16[256];
+extern const OpFn ops_sf_fpu_d9_a32[256];
+extern const OpFn ops_sf_fpu_da_a16[256];
+extern const OpFn ops_sf_fpu_da_a32[256];
+extern const OpFn ops_sf_fpu_db_a16[256];
+extern const OpFn ops_sf_fpu_db_a32[256];
+extern const OpFn ops_sf_fpu_dc_a16[32];
+extern const OpFn ops_sf_fpu_dc_a32[32];
+extern const OpFn ops_sf_fpu_dd_a16[256];
+extern const OpFn ops_sf_fpu_dd_a32[256];
+extern const OpFn ops_sf_fpu_de_a16[256];
+extern const OpFn ops_sf_fpu_de_a32[256];
+extern const OpFn ops_sf_fpu_df_a16[256];
+extern const OpFn ops_sf_fpu_df_a32[256];
+
 extern const OpFn ops_fpu_287_d9_a16[256];
 extern const OpFn ops_fpu_287_d9_a32[256];
 extern const OpFn ops_fpu_287_da_a16[256];
@@ -239,6 +272,13 @@ extern const OpFn ops_fpu_df_a32[256];
 extern const OpFn ops_nofpu_a16[256];
 extern const OpFn ops_nofpu_a32[256];
 
+extern const OpFn ops_sf_fpu_686_da_a16[256];
+extern const OpFn ops_sf_fpu_686_da_a32[256];
+extern const OpFn ops_sf_fpu_686_db_a16[256];
+extern const OpFn ops_sf_fpu_686_db_a32[256];
+extern const OpFn ops_sf_fpu_686_df_a16[256];
+extern const OpFn ops_sf_fpu_686_df_a32[256];
+
 extern const OpFn ops_fpu_686_da_a16[256];
 extern const OpFn ops_fpu_686_da_a32[256];
 extern const OpFn ops_fpu_686_db_a16[256];
diff --git a/src/cpu/x86_ops_fpu.h b/src/cpu/x86_ops_fpu.h
index 314ec321b..502218be5 100644
--- a/src/cpu/x86_ops_fpu.h
+++ b/src/cpu/x86_ops_fpu.h
@@ -96,6 +96,15 @@ opWAIT(uint32_t fetchdat)
         x86_int(7);
         return 1;
     }
+
+    if (!cpu_use_dynarec && fpu_softfloat) {
+        if (fpu_state.swd & FPU_SW_Summary) {
+            if (cr0 & 0x20) {
+                x86_int(16);
+                return 1;
+            }
+        }
+    }
     CLOCK_CYCLES(4);
     return 0;
 }
diff --git a/src/cpu/x86_ops_io.h b/src/cpu/x86_ops_io.h
index c4d46404d..8a99b8668 100644
--- a/src/cpu/x86_ops_io.h
+++ b/src/cpu/x86_ops_io.h
@@ -2,7 +2,7 @@ static int
 opIN_AL_imm(uint32_t fetchdat)
 {
     uint16_t port = (uint16_t) getbytef();
-    check_io_perm(port);
+    check_io_perm(port, 1);
     AL = inb(port);
     CLOCK_CYCLES(12);
     PREFETCH_RUN(12, 2, -1, 1, 0, 0, 0, 0);
@@ -14,8 +14,7 @@ static int
 opIN_AX_imm(uint32_t fetchdat)
 {
     uint16_t port = (uint16_t) getbytef();
-    check_io_perm(port);
-    check_io_perm(port + 1);
+    check_io_perm(port, 2);
     AX = inw(port);
     CLOCK_CYCLES(12);
     PREFETCH_RUN(12, 2, -1, 1, 0, 0, 0, 0);
@@ -27,10 +26,7 @@ static int
 opIN_EAX_imm(uint32_t fetchdat)
 {
     uint16_t port = (uint16_t) getbytef();
-    check_io_perm(port);
-    check_io_perm(port + 1);
-    check_io_perm(port + 2);
-    check_io_perm(port + 3);
+    check_io_perm(port, 4);
     EAX = inl(port);
     CLOCK_CYCLES(12);
     PREFETCH_RUN(12, 2, -1, 0, 1, 0, 0, 0);
@@ -43,7 +39,7 @@ static int
 opOUT_AL_imm(uint32_t fetchdat)
 {
     uint16_t port = (uint16_t) getbytef();
-    check_io_perm(port);
+    check_io_perm(port, 1);
     outb(port, AL);
     CLOCK_CYCLES(10);
     PREFETCH_RUN(10, 2, -1, 0, 0, 1, 0, 0);
@@ -57,8 +53,7 @@ static int
 opOUT_AX_imm(uint32_t fetchdat)
 {
     uint16_t port = (uint16_t) getbytef();
-    check_io_perm(port);
-    check_io_perm(port + 1);
+    check_io_perm(port, 2);
     outw(port, AX);
     CLOCK_CYCLES(10);
     PREFETCH_RUN(10, 2, -1, 0, 0, 1, 0, 0);
@@ -70,10 +65,7 @@ static int
 opOUT_EAX_imm(uint32_t fetchdat)
 {
     uint16_t port = (uint16_t) getbytef();
-    check_io_perm(port);
-    check_io_perm(port + 1);
-    check_io_perm(port + 2);
-    check_io_perm(port + 3);
+    check_io_perm(port, 4);
     outl(port, EAX);
     CLOCK_CYCLES(10);
     PREFETCH_RUN(10, 2, -1, 0, 0, 0, 1, 0);
@@ -85,7 +77,7 @@ opOUT_EAX_imm(uint32_t fetchdat)
 static int
 opIN_AL_DX(uint32_t fetchdat)
 {
-    check_io_perm(DX);
+    check_io_perm(DX, 1);
     AL = inb(DX);
     CLOCK_CYCLES(12);
     PREFETCH_RUN(12, 1, -1, 1, 0, 0, 0, 0);
@@ -96,8 +88,7 @@ opIN_AL_DX(uint32_t fetchdat)
 static int
 opIN_AX_DX(uint32_t fetchdat)
 {
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
+    check_io_perm(DX, 2);
     AX = inw(DX);
     CLOCK_CYCLES(12);
     PREFETCH_RUN(12, 1, -1, 1, 0, 0, 0, 0);
@@ -108,10 +99,7 @@ opIN_AX_DX(uint32_t fetchdat)
 static int
 opIN_EAX_DX(uint32_t fetchdat)
 {
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
-    check_io_perm(DX + 2);
-    check_io_perm(DX + 3);
+    check_io_perm(DX, 4);
     EAX = inl(DX);
     CLOCK_CYCLES(12);
     PREFETCH_RUN(12, 1, -1, 0, 1, 0, 0, 0);
@@ -123,7 +111,7 @@ opIN_EAX_DX(uint32_t fetchdat)
 static int
 opOUT_AL_DX(uint32_t fetchdat)
 {
-    check_io_perm(DX);
+    check_io_perm(DX, 1);
     outb(DX, AL);
     CLOCK_CYCLES(11);
     PREFETCH_RUN(11, 1, -1, 0, 0, 1, 0, 0);
@@ -134,8 +122,7 @@ opOUT_AL_DX(uint32_t fetchdat)
 static int
 opOUT_AX_DX(uint32_t fetchdat)
 {
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
+    check_io_perm(DX, 2);
     outw(DX, AX);
     CLOCK_CYCLES(11);
     PREFETCH_RUN(11, 1, -1, 0, 0, 1, 0, 0);
@@ -146,10 +133,7 @@ opOUT_AX_DX(uint32_t fetchdat)
 static int
 opOUT_EAX_DX(uint32_t fetchdat)
 {
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
-    check_io_perm(DX + 2);
-    check_io_perm(DX + 3);
+    check_io_perm(DX, 4);
     outl(DX, EAX);
     PREFETCH_RUN(11, 1, -1, 0, 0, 0, 1, 0);
     if (nmi && nmi_enable && nmi_mask)
diff --git a/src/cpu/x86_ops_rep.h b/src/cpu/x86_ops_rep.h
index 67bd8433d..4b8f42185 100644
--- a/src/cpu/x86_ops_rep.h
+++ b/src/cpu/x86_ops_rep.h
@@ -9,7 +9,7 @@
             uint8_t temp;                                                                                         \
                                                                                                                   \
             SEG_CHECK_WRITE(&cpu_state.seg_es);                                                                   \
-            check_io_perm(DX);                                                                                    \
+            check_io_perm(DX, 1);                                                                                 \
             CHECK_WRITE(&cpu_state.seg_es, DEST_REG, DEST_REG);                                                   \
             high_page = 0;                                                                                        \
             do_mmut_wb(es, DEST_REG, &addr64);                                                                    \
@@ -48,8 +48,7 @@
             uint16_t temp;                                                                                        \
                                                                                                                   \
             SEG_CHECK_WRITE(&cpu_state.seg_es);                                                                   \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
+            check_io_perm(DX, 2);                                                                                 \
             CHECK_WRITE(&cpu_state.seg_es, DEST_REG, DEST_REG + 1UL);                                             \
             high_page = 0;                                                                                        \
             do_mmut_ww(es, DEST_REG, addr64a);                                                                    \
@@ -88,10 +87,7 @@
             uint32_t temp;                                                                                        \
                                                                                                                   \
             SEG_CHECK_WRITE(&cpu_state.seg_es);                                                                   \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
-            check_io_perm(DX + 2);                                                                                \
-            check_io_perm(DX + 3);                                                                                \
+            check_io_perm(DX, 4);                                                                                 \
             CHECK_WRITE(&cpu_state.seg_es, DEST_REG, DEST_REG + 3UL);                                             \
             high_page = 0;                                                                                        \
             do_mmut_wl(es, DEST_REG, addr64a);                                                                    \
@@ -132,7 +128,7 @@
             temp = readmemb(cpu_state.ea_seg->base, SRC_REG);                                                     \
             if (cpu_state.abrt)                                                                                   \
                 return 1;                                                                                         \
-            check_io_perm(DX);                                                                                    \
+            check_io_perm(DX, 1);                                                                                 \
             outb(DX, temp);                                                                                       \
             if (cpu_state.flags & D_FLAG)                                                                         \
                 SRC_REG--;                                                                                        \
@@ -163,8 +159,7 @@
             temp = readmemw(cpu_state.ea_seg->base, SRC_REG);                                                     \
             if (cpu_state.abrt)                                                                                   \
                 return 1;                                                                                         \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
+            check_io_perm(DX, 2);                                                                                 \
             outw(DX, temp);                                                                                       \
             if (cpu_state.flags & D_FLAG)                                                                         \
                 SRC_REG -= 2;                                                                                     \
@@ -195,10 +190,7 @@
             temp = readmeml(cpu_state.ea_seg->base, SRC_REG);                                                     \
             if (cpu_state.abrt)                                                                                   \
                 return 1;                                                                                         \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
-            check_io_perm(DX + 2);                                                                                \
-            check_io_perm(DX + 3);                                                                                \
+            check_io_perm(DX, 4);                                                                                 \
             outl(DX, temp);                                                                                       \
             if (cpu_state.flags & D_FLAG)                                                                         \
                 SRC_REG -= 4;                                                                                     \
diff --git a/src/cpu/x86_ops_rep_dyn.h b/src/cpu/x86_ops_rep_dyn.h
index 576baa403..5c64ed94d 100644
--- a/src/cpu/x86_ops_rep_dyn.h
+++ b/src/cpu/x86_ops_rep_dyn.h
@@ -7,7 +7,7 @@
             uint8_t temp;                                                                                         \
                                                                                                                   \
             SEG_CHECK_WRITE(&cpu_state.seg_es);                                                                   \
-            check_io_perm(DX);                                                                                    \
+            check_io_perm(DX, 1);                                                                                 \
             CHECK_WRITE(&cpu_state.seg_es, DEST_REG, DEST_REG);                                                   \
             high_page = 0;                                                                                        \
             do_mmut_wb(es, DEST_REG, &addr64);                                                                    \
@@ -40,8 +40,7 @@
             uint16_t temp;                                                                                        \
                                                                                                                   \
             SEG_CHECK_WRITE(&cpu_state.seg_es);                                                                   \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
+            check_io_perm(DX, 2);                                                                                 \
             CHECK_WRITE(&cpu_state.seg_es, DEST_REG, DEST_REG + 1UL);                                             \
             high_page = 0;                                                                                        \
             do_mmut_ww(es, DEST_REG, addr64a);                                                                    \
@@ -74,10 +73,7 @@
             uint32_t temp;                                                                                        \
                                                                                                                   \
             SEG_CHECK_WRITE(&cpu_state.seg_es);                                                                   \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
-            check_io_perm(DX + 2);                                                                                \
-            check_io_perm(DX + 3);                                                                                \
+            check_io_perm(DX, 4);                                                                                 \
             CHECK_WRITE(&cpu_state.seg_es, DEST_REG, DEST_REG + 3UL);                                             \
             high_page = 0;                                                                                        \
             do_mmut_wl(es, DEST_REG, addr64a);                                                                    \
@@ -112,7 +108,7 @@
             temp = readmemb(cpu_state.ea_seg->base, SRC_REG);                                                     \
             if (cpu_state.abrt)                                                                                   \
                 return 1;                                                                                         \
-            check_io_perm(DX);                                                                                    \
+            check_io_perm(DX, 1);                                                                                 \
             outb(DX, temp);                                                                                       \
             if (cpu_state.flags & D_FLAG)                                                                         \
                 SRC_REG--;                                                                                        \
@@ -137,8 +133,7 @@
             temp = readmemw(cpu_state.ea_seg->base, SRC_REG);                                                     \
             if (cpu_state.abrt)                                                                                   \
                 return 1;                                                                                         \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
+            check_io_perm(DX, 2);                                                                                 \
             outw(DX, temp);                                                                                       \
             if (cpu_state.flags & D_FLAG)                                                                         \
                 SRC_REG -= 2;                                                                                     \
@@ -163,10 +158,7 @@
             temp = readmeml(cpu_state.ea_seg->base, SRC_REG);                                                     \
             if (cpu_state.abrt)                                                                                   \
                 return 1;                                                                                         \
-            check_io_perm(DX);                                                                                    \
-            check_io_perm(DX + 1);                                                                                \
-            check_io_perm(DX + 2);                                                                                \
-            check_io_perm(DX + 3);                                                                                \
+            check_io_perm(DX, 4);                                                                                 \
             outl(DX, temp);                                                                                       \
             if (cpu_state.flags & D_FLAG)                                                                         \
                 SRC_REG -= 4;                                                                                     \
diff --git a/src/cpu/x86_ops_string.h b/src/cpu/x86_ops_string.h
index 5cc5f3806..c3875a648 100644
--- a/src/cpu/x86_ops_string.h
+++ b/src/cpu/x86_ops_string.h
@@ -804,7 +804,7 @@ opINSB_a16(uint32_t fetchdat)
     addr64 = 0x00000000;
 
     SEG_CHECK_WRITE(&cpu_state.seg_es);
-    check_io_perm(DX);
+    check_io_perm(DX, 1);
     CHECK_WRITE(&cpu_state.seg_es, DI, DI);
     high_page = 0;
     do_mmut_wb(es, DI, &addr64);
@@ -830,7 +830,7 @@ opINSB_a32(uint32_t fetchdat)
     addr64 = 0x00000000;
 
     SEG_CHECK_WRITE(&cpu_state.seg_es);
-    check_io_perm(DX);
+    check_io_perm(DX, 1);
     high_page = 0;
     CHECK_WRITE(&cpu_state.seg_es, EDI, EDI);
     do_mmut_wb(es, EDI, &addr64);
@@ -857,8 +857,7 @@ opINSW_a16(uint32_t fetchdat)
     addr64a[0] = addr64a[1] = 0x00000000;
 
     SEG_CHECK_WRITE(&cpu_state.seg_es);
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
+    check_io_perm(DX, 2);
     CHECK_WRITE(&cpu_state.seg_es, DI, DI + 1UL);
     high_page = 0;
     do_mmut_ww(es, DI, addr64a);
@@ -885,8 +884,7 @@ opINSW_a32(uint32_t fetchdat)
 
     SEG_CHECK_WRITE(&cpu_state.seg_es);
     high_page = 0;
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
+    check_io_perm(DX, 2);
     CHECK_WRITE(&cpu_state.seg_es, EDI, EDI + 1UL);
     do_mmut_ww(es, EDI, addr64a);
     if (cpu_state.abrt)
@@ -912,10 +910,7 @@ opINSL_a16(uint32_t fetchdat)
     addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000;
 
     SEG_CHECK_WRITE(&cpu_state.seg_es);
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
-    check_io_perm(DX + 2);
-    check_io_perm(DX + 3);
+    check_io_perm(DX, 4);
     CHECK_WRITE(&cpu_state.seg_es, DI, DI + 3UL);
     high_page = 0;
     do_mmut_wl(es, DI, addr64a);
@@ -941,10 +936,7 @@ opINSL_a32(uint32_t fetchdat)
     addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000;
 
     SEG_CHECK_WRITE(&cpu_state.seg_es);
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
-    check_io_perm(DX + 2);
-    check_io_perm(DX + 3);
+    check_io_perm(DX, 4);
     CHECK_WRITE(&cpu_state.seg_es, EDI, EDI + 3UL);
     high_page = 0;
     do_mmut_wl(es, DI, addr64a);
@@ -973,7 +965,7 @@ opOUTSB_a16(uint32_t fetchdat)
     temp = readmemb(cpu_state.ea_seg->base, SI);
     if (cpu_state.abrt)
         return 1;
-    check_io_perm(DX);
+    check_io_perm(DX, 1);
     if (cpu_state.flags & D_FLAG)
         SI--;
     else
@@ -993,7 +985,7 @@ opOUTSB_a32(uint32_t fetchdat)
     temp = readmemb(cpu_state.ea_seg->base, ESI);
     if (cpu_state.abrt)
         return 1;
-    check_io_perm(DX);
+    check_io_perm(DX, 1);
     if (cpu_state.flags & D_FLAG)
         ESI--;
     else
@@ -1014,8 +1006,7 @@ opOUTSW_a16(uint32_t fetchdat)
     temp = readmemw(cpu_state.ea_seg->base, SI);
     if (cpu_state.abrt)
         return 1;
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
+    check_io_perm(DX, 2);
     if (cpu_state.flags & D_FLAG)
         SI -= 2;
     else
@@ -1035,8 +1026,7 @@ opOUTSW_a32(uint32_t fetchdat)
     temp = readmemw(cpu_state.ea_seg->base, ESI);
     if (cpu_state.abrt)
         return 1;
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
+    check_io_perm(DX, 2);
     if (cpu_state.flags & D_FLAG)
         ESI -= 2;
     else
@@ -1057,10 +1047,7 @@ opOUTSL_a16(uint32_t fetchdat)
     temp = readmeml(cpu_state.ea_seg->base, SI);
     if (cpu_state.abrt)
         return 1;
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
-    check_io_perm(DX + 2);
-    check_io_perm(DX + 3);
+    check_io_perm(DX, 4);
     if (cpu_state.flags & D_FLAG)
         SI -= 4;
     else
@@ -1080,10 +1067,7 @@ opOUTSL_a32(uint32_t fetchdat)
     temp = readmeml(cpu_state.ea_seg->base, ESI);
     if (cpu_state.abrt)
         return 1;
-    check_io_perm(DX);
-    check_io_perm(DX + 1);
-    check_io_perm(DX + 2);
-    check_io_perm(DX + 3);
+    check_io_perm(DX, 4);
     if (cpu_state.flags & D_FLAG)
         ESI -= 4;
     else
diff --git a/src/cpu/x87.c b/src/cpu/x87.c
index 0b93af9da..98ceb105b 100644
--- a/src/cpu/x87.c
+++ b/src/cpu/x87.c
@@ -15,6 +15,7 @@
 #include "x86_ops.h"
 #include "x87.h"
 #include "386_common.h"
+#include "softfloat/softfloat-specialize.h"
 
 uint32_t x87_pc_off, x87_op_off;
 uint16_t x87_pc_seg, x87_op_seg;
@@ -37,11 +38,6 @@ fpu_log(const char *fmt, ...)
 #    define fpu_log(fmt, ...)
 #endif
 
-#define X87_TAG_VALID   0
-#define X87_TAG_ZERO    1
-#define X87_TAG_INVALID 2
-#define X87_TAG_EMPTY   3
-
 #ifdef USE_NEW_DYNAREC
 uint16_t
 x87_gettag(void)
@@ -110,6 +106,340 @@ x87_settag(uint16_t new_tag)
 }
 #endif
 
+
+static floatx80
+FPU_handle_NaN32_Func(floatx80 a, int aIsNaN, float32 b32, int bIsNaN, struct float_status_t *status)
+{
+    int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+    int bIsSignalingNaN = float32_is_signaling_nan(b32);
+
+    if (aIsSignalingNaN | bIsSignalingNaN)
+        float_raise(status, float_flag_invalid);
+
+    // propagate QNaN to SNaN
+    a = propagateFloatx80NaNOne(a, status);
+
+    if (aIsNaN & !bIsNaN) return a;
+
+    // float32 is NaN so conversion will propagate SNaN to QNaN and raise
+    // appropriate exception flags
+    floatx80 b = float32_to_floatx80(b32, status);
+
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN) return a;
+ returnLargerSignificand:
+        if (a.fraction < b.fraction) return b;
+        if (b.fraction < a.fraction) return a;
+        return (a.exp < b.exp) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+int
+FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    if (floatx80_is_unsupported(a)) {
+        float_raise(status, float_flag_invalid);
+        *r = floatx80_default_nan;
+        return 1;
+    }
+
+    int aIsNaN = floatx80_is_nan(a), bIsNaN = float32_is_nan(b);
+    if (aIsNaN | bIsNaN) {
+        *r = FPU_handle_NaN32_Func(a, aIsNaN, b, bIsNaN, status);
+        return 1;
+    }
+    return 0;
+}
+
+static floatx80
+FPU_handle_NaN64_Func(floatx80 a, int aIsNaN, float64 b64, int bIsNaN, struct float_status_t *status)
+{
+    int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+    int bIsSignalingNaN = float64_is_signaling_nan(b64);
+
+    if (aIsSignalingNaN | bIsSignalingNaN)
+        float_raise(status, float_flag_invalid);
+
+    // propagate QNaN to SNaN
+    a = propagateFloatx80NaNOne(a, status);
+
+    if (aIsNaN & !bIsNaN) return a;
+
+    // float64 is NaN so conversion will propagate SNaN to QNaN and raise
+    // appropriate exception flags
+    floatx80 b = float64_to_floatx80(b64, status);
+
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN) return a;
+ returnLargerSignificand:
+        if (a.fraction < b.fraction) return b;
+        if (b.fraction < a.fraction) return a;
+        return (a.exp < b.exp) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+int
+FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    if (floatx80_is_unsupported(a)) {
+        float_raise(status, float_flag_invalid);
+        *r = floatx80_default_nan;
+        return 1;
+    }
+
+    int aIsNaN = floatx80_is_nan(a), bIsNaN = float64_is_nan(b);
+    if (aIsNaN | bIsNaN) {
+        *r = FPU_handle_NaN64_Func(a, aIsNaN, b, bIsNaN, status);
+        return 1;
+    }
+    return 0;
+}
+
+struct float_status_t
+i387cw_to_softfloat_status_word(uint16_t control_word)
+{
+    struct float_status_t status;
+    int precision = control_word & FPU_CW_PC;
+
+    switch (precision) {
+        case FPU_PR_32_BITS:
+            status.float_rounding_precision = 32;
+            break;
+        case FPU_PR_64_BITS:
+            status.float_rounding_precision = 64;
+            break;
+        case FPU_PR_80_BITS:
+            status.float_rounding_precision = 80;
+            break;
+        default:
+        /* With the precision control bits set to 01 "(reserved)", a
+           real CPU behaves as if the precision control bits were
+           set to 11 "80 bits" */
+            status.float_rounding_precision = 80;
+            break;
+    }
+
+    status.float_exception_flags = 0; // clear exceptions before execution
+    status.float_nan_handling_mode = float_first_operand_nan;
+    status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10;
+    status.flush_underflow_to_zero = 0;
+    status.float_suppress_exception = 0;
+    status.float_exception_masks = control_word & FPU_CW_Exceptions_Mask;
+    status.denormals_are_zeros = 0;
+    return status;
+}
+
+
+int
+FPU_status_word_flags_fpu_compare(int float_relation)
+{
+    switch (float_relation) {
+        case float_relation_unordered:
+            return (C0 | C2 | C3);
+
+        case float_relation_greater:
+            return (0);
+
+        case float_relation_less:
+            return (C0);
+
+        case float_relation_equal:
+            return (C3);
+    }
+
+    return (-1);        // should never get here
+}
+
+void
+FPU_write_eflags_fpu_compare(int float_relation)
+{
+    switch (float_relation) {
+        case float_relation_unordered:
+            cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
+            break;
+
+        case float_relation_greater:
+            break;
+
+        case float_relation_less:
+            cpu_state.flags |= (C_FLAG);
+            break;
+
+        case float_relation_equal:
+            cpu_state.flags |= (Z_FLAG);
+            break;
+
+        default:
+            break;
+    }
+}
+
+uint16_t
+FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store)
+{
+    uint16_t status;
+    uint16_t unmasked;
+
+    /* Extract only the bits which we use to set the status word */
+    exceptions &= FPU_SW_Exceptions_Mask;
+    status = fpu_state.swd;
+
+    unmasked = (exceptions & ~fpu_state.cwd) & FPU_CW_Exceptions_Mask;
+
+    // if IE or DZ exception happen nothing else will be reported
+    if (exceptions & (FPU_EX_Invalid | FPU_EX_Zero_Div)) {
+        unmasked &= (FPU_EX_Invalid | FPU_EX_Zero_Div);
+    }
+
+    /* Set summary bits if exception isn't masked */
+    if (unmasked) {
+        fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
+    }
+
+    if (exceptions & FPU_EX_Invalid) {
+        // FPU_EX_Invalid cannot come with any other exception but x87 stack fault
+        fpu_state.swd |= exceptions;
+        if (exceptions & FPU_SW_Stack_Fault) {
+            if (!(exceptions & C1)) {
+               /* This bit distinguishes over- from underflow for a stack fault,
+                  and roundup from round-down for precision loss. */
+                  fpu_state.swd &= ~C1;
+            }
+        }
+        return unmasked;
+    }
+
+    if (exceptions & FPU_EX_Zero_Div) {
+        fpu_state.swd |= FPU_EX_Zero_Div;
+        if (!(fpu_state.cwd & FPU_EX_Zero_Div)) {
+#ifdef FPU_8087
+            if (!(fpu_state.cwd & FPU_SW_Summary)) {
+                fpu_state.cwd |= FPU_SW_Summary;
+                nmi = 1;
+            }
+#else
+            picint(1 << 13);
+#endif // FPU_8087
+        }
+        return unmasked;
+    }
+
+    if (exceptions & FPU_EX_Denormal) {
+        fpu_state.swd |= FPU_EX_Denormal;
+        if (unmasked & FPU_EX_Denormal) {
+            return (unmasked & FPU_EX_Denormal);
+        }
+    }
+
+    /* Set the corresponding exception bits */
+    fpu_state.swd |= exceptions;
+
+    if (exceptions & FPU_EX_Precision) {
+        if (!(exceptions & C1)) {
+          /* This bit distinguishes over- from underflow for a stack fault,
+               and roundup from round-down for precision loss. */
+            fpu_state.swd &= ~C1;
+        }
+    }
+
+    // If #P unmasked exception occurred the result still has to be
+    // written to the destination.
+    unmasked &= ~FPU_EX_Precision;
+
+    if (unmasked & (FPU_EX_Underflow | FPU_EX_Overflow)) {
+        // If unmasked over- or underflow occurs and dest is a memory location:
+        //   - the TOS and destination operands remain unchanged
+        //   - the inexact-result condition is not reported and C1 flag is cleared
+        //   - no result is stored in the memory
+        // If the destination is in the register stack, adjusted resulting value
+        // is stored in the destination operand.
+        if (!store)
+            unmasked &= ~(FPU_EX_Underflow | FPU_EX_Overflow);
+        else {
+            fpu_state.swd &= ~C1;
+            if (!(status & FPU_EX_Precision))
+                fpu_state.swd &= ~FPU_EX_Precision;
+        }
+    }
+    return unmasked;
+}
+
+void
+FPU_stack_overflow(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    /* The masked response */
+    if (is_IA_masked()) {
+        FPU_push();
+        FPU_save_regi(floatx80_default_nan, 0);
+    }
+    FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
+}
+
+void
+FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+    /* The masked response */
+    if (is_IA_masked()) {
+        FPU_save_regi(floatx80_default_nan, stnr);
+        if (pop_stack)
+            FPU_pop();
+    }
+    FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+}
+
+/* -----------------------------------------------------------
+ * Slimmed down version used to compile against a CPU simulator
+ * rather than a kernel (ported by Kevin Lawton)
+ * ------------------------------------------------------------ */
+int
+FPU_tagof(const floatx80 reg)
+{
+    int32_t exp = floatx80_exp(reg);
+    if (exp == 0) {
+        if (!floatx80_fraction(reg))
+            return X87_TAG_ZERO;
+
+        /* The number is a de-normal or pseudodenormal. */
+        return X87_TAG_INVALID;
+    }
+
+    if (exp == 0x7fff) {
+        /* Is an Infinity, a NaN, or an unsupported data type. */
+        return X87_TAG_INVALID;
+    }
+
+    if (!(reg.fraction & BX_CONST64(0x8000000000000000))) {
+        /* Unsupported data type. */
+        /* Valid numbers have the ms bit set to 1. */
+        return X87_TAG_INVALID;
+    }
+
+    return X87_TAG_VALID;
+}
+
+
 #ifdef ENABLE_808X_LOG
 void
 x87_dumpregs(void)
diff --git a/src/cpu/x87.h b/src/cpu/x87.h
index 96ad835c8..5d460bc4b 100644
--- a/src/cpu/x87.h
+++ b/src/cpu/x87.h
@@ -1,7 +1,7 @@
-#define C0 (1 << 8)
-#define C1 (1 << 9)
-#define C2 (1 << 10)
-#define C3 (1 << 14)
+#define X87_TAG_VALID   0
+#define X87_TAG_ZERO    1
+#define X87_TAG_INVALID 2
+#define X87_TAG_EMPTY   3
 
 extern uint32_t x87_pc_off, x87_op_off;
 extern uint16_t x87_pc_seg, x87_op_seg;
@@ -46,3 +46,184 @@ void     x87_settag(uint16_t new_tag);
 #define X87_ROUNDING_CHOP    3
 
 void codegen_set_rounding_mode(int mode);
+
+/* Status Word */
+#define FPU_SW_Backward        (0x8000)  /* backward compatibility */
+#define FPU_SW_C3              (0x4000)  /* condition bit 3 */
+#define FPU_SW_Top             (0x3800)  /* top of stack */
+#define FPU_SW_C2              (0x0400)  /* condition bit 2 */
+#define FPU_SW_C1              (0x0200)  /* condition bit 1 */
+#define FPU_SW_C0              (0x0100)  /* condition bit 0 */
+#define FPU_SW_Summary         (0x0080)  /* exception summary */
+#define FPU_SW_Stack_Fault     (0x0040)  /* stack fault */
+#define FPU_SW_Precision       (0x0020)  /* loss of precision */
+#define FPU_SW_Underflow       (0x0010)  /* underflow */
+#define FPU_SW_Overflow        (0x0008)  /* overflow */
+#define FPU_SW_Zero_Div        (0x0004)  /* divide by zero */
+#define FPU_SW_Denormal_Op     (0x0002)  /* denormalized operand */
+#define FPU_SW_Invalid         (0x0001)  /* invalid operation */
+
+#define C0                (1 << 8)
+#define C1                (1 << 9)
+#define C2                (1 << 10)
+#define C3                (1 << 14)
+
+#define FPU_SW_CC (C0 | C1 | C2 | C3)
+
+#define FPU_SW_Exceptions_Mask (0x027f)  /* status word exceptions bit mask */
+
+/* Exception flags: */
+#define FPU_EX_Precision    (0x0020)  /* loss of precision */
+#define FPU_EX_Underflow    (0x0010)  /* underflow */
+#define FPU_EX_Overflow     (0x0008)  /* overflow */
+#define FPU_EX_Zero_Div     (0x0004)  /* divide by zero */
+#define FPU_EX_Denormal     (0x0002)  /* denormalized operand */
+#define FPU_EX_Invalid      (0x0001)  /* invalid operation */
+
+/* Special exceptions: */
+#define FPU_EX_Stack_Overflow    (0x0041| C1)     /* stack overflow */
+#define FPU_EX_Stack_Underflow   (0x0041)        /* stack underflow */
+
+/* precision control */
+#define FPU_EX_Precision_Lost_Up    (EX_Precision | C1)
+#define FPU_EX_Precision_Lost_Dn    (EX_Precision)
+
+#define setcc(cc)  \
+  fpu_state.swd = (fpu_state.swd & ~(FPU_SW_CC)) | ((cc) & FPU_SW_CC)
+
+#define clear_C1() { fpu_state.swd &= ~C1; }
+#define clear_C2() { fpu_state.swd &= ~C2; }
+
+/* ************ */
+/* Control Word */
+/* ************ */
+
+#define FPU_CW_Inf		(0x1000)  /* infinity control, legacy */
+
+#define FPU_CW_RC		(0x0C00)  /* rounding control */
+#define FPU_CW_PC		(0x0300)  /* precision control */
+
+#define FPU_RC_RND		(0x0000)  /* rounding control */
+#define FPU_RC_DOWN		(0x0400)
+#define FPU_RC_UP		(0x0800)
+#define FPU_RC_CHOP		(0x0C00)
+
+#define FPU_CW_Precision	(0x0020)  /* loss of precision mask */
+#define FPU_CW_Underflow	(0x0010)  /* underflow mask */
+#define FPU_CW_Overflow		(0x0008)  /* overflow mask */
+#define FPU_CW_Zero_Div		(0x0004)  /* divide by zero mask */
+#define FPU_CW_Denormal		(0x0002)  /* denormalized operand mask */
+#define FPU_CW_Invalid		(0x0001)  /* invalid operation mask */
+
+#define FPU_CW_Exceptions_Mask 	(0x003f)  /* all masks */
+
+/* Precision control bits affect only the following:
+   ADD, SUB(R), MUL, DIV(R), and SQRT */
+#define FPU_PR_32_BITS          (0x000)
+#define FPU_PR_RESERVED_BITS    (0x100)
+#define FPU_PR_64_BITS          (0x200)
+#define FPU_PR_80_BITS          (0x300)
+
+#include "softfloat/softfloatx80.h"
+
+static __inline const int
+is_IA_masked(void)
+{
+    return (fpu_state.cwd & FPU_CW_Invalid);
+}
+
+struct float_status_t i387cw_to_softfloat_status_word(uint16_t control_word);
+uint16_t FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store);
+int FPU_status_word_flags_fpu_compare(int float_relation);
+void FPU_write_eflags_fpu_compare(int float_relation);
+void FPU_stack_overflow(uint32_t fetchdat);
+void FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack);
+int FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status);
+int FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status);
+int FPU_tagof(const floatx80 reg);
+
+static __inline uint16_t
+i387_get_control_word(void)
+{
+    return (fpu_state.cwd);
+}
+
+static __inline uint16_t
+i387_get_status_word(void)
+{
+    return (fpu_state.swd & ~FPU_SW_Top & 0xFFFF) | ((fpu_state.tos << 11) & FPU_SW_Top);
+}
+
+#define IS_TAG_EMPTY(i) \
+    (FPU_gettagi(i) == X87_TAG_EMPTY)
+
+static __inline int
+FPU_gettagi(int stnr)
+{
+    return (fpu_state.tag >> (((stnr + fpu_state.tos) & 7) * 2)) & 3;
+}
+
+static __inline void
+FPU_settagi_valid(int stnr)
+{
+    int regnr = (stnr + fpu_state.tos) & 7;
+    fpu_state.tag &= ~(3 << (regnr * 2));     // FPU_Tag_Valid == '00
+}
+
+static __inline void
+FPU_settagi(int tag, int stnr)
+{
+    int regnr = (stnr + fpu_state.tos) & 7;
+    fpu_state.tag &= ~(3 << (regnr * 2));
+    fpu_state.tag |= (tag & 3) << (regnr * 2);
+}
+
+static __inline void
+FPU_push(void)
+{
+    fpu_state.tos = (fpu_state.tos - 1) & 7;
+}
+
+static __inline void
+FPU_pop(void)
+{
+    fpu_state.tag |= 3 << (fpu_state.tos * 2);
+    fpu_state.tos = (fpu_state.tos + 1) & 7;
+}
+
+static __inline floatx80
+FPU_read_regi(int stnr)
+{
+    return fpu_state.st_space[(stnr + fpu_state.tos) & 7];
+}
+
+// it is only possible to read FPU tag word through certain
+// instructions like FNSAVE, and they update tag word to its
+// real value anyway
+static __inline void
+FPU_save_regi(floatx80 reg, int stnr)
+{
+    fpu_state.st_space[(stnr + fpu_state.tos) & 7] = reg;
+    FPU_settagi_valid(stnr);
+}
+
+static __inline void
+FPU_save_regi_tag(floatx80 reg, int tag, int stnr)
+{
+    fpu_state.st_space[(stnr + fpu_state.tos) & 7] = reg;
+    FPU_settagi(tag, stnr);
+}
+
+
+#define FPU_check_pending_exceptions() \
+do { \
+    if (fpu_state.swd & FPU_SW_Summary) { \
+        if (cr0 & 0x20) { \
+            x86_int(16); \
+            return 1; \
+        } else { \
+            picint(1 << 13); \
+            return 1; \
+        } \
+    } \
+} while (0)
diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h
index c00cdff16..e321df959 100644
--- a/src/cpu/x87_ops.h
+++ b/src/cpu/x87_ops.h
@@ -26,6 +26,7 @@
 #ifdef _MSC_VER
 #    include <intrin.h>
 #endif
+#include "x87_ops_conv.h"
 
 #ifdef ENABLE_FPU_LOG
 extern void fpu_log(const char *fmt, ...);
@@ -44,8 +45,24 @@ static int rounding_modes[4] = { FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARD
 #define C2                (1 << 10)
 #define C3                (1 << 14)
 
+#define X87_TAG_VALID   0
+#define X87_TAG_ZERO    1
+#define X87_TAG_INVALID 2
+#define X87_TAG_EMPTY   3
+
 #define STATUS_ZERODIVIDE 4
 
+typedef union
+{
+    double d;
+
+    struct {
+        uint64_t mantissa:52;
+        uint64_t exponent:11;
+        uint64_t negative:1;
+    };
+} double_decompose_t;
+
 #if defined(_MSC_VER) && !defined(__clang__)
 #    if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86
 #        define X87_INLINE_ASM
@@ -239,8 +256,6 @@ x87_fround(double b)
     return 0LL;
 }
 
-#include "x87_ops_conv.h"
-
 static __inline double
 x87_ld80(void)
 {
@@ -466,6 +481,14 @@ typedef union {
 #    define FP_TAG_VALID_N cpu_state.tag[(cpu_state.TOP + 1) & 7] &= ~TAG_UINT64
 #endif
 
+#include "x87_ops_sf_arith.h"
+#include "x87_ops_sf_compare.h"
+#include "x87_ops_sf_const.h"
+#include "x87_ops_sf_load_store.h"
+#include "x87_ops_sf_misc.h"
+#include "x87_ops_sf_trans.h"
+#include "x87_ops_sf.h"
+
 #include "x87_ops_arith.h"
 #include "x87_ops_misc.h"
 #include "x87_ops_loadstore.h"
@@ -526,6 +549,264 @@ FPU_ILLEGAL_a32(uint32_t fetchdat)
 #define ILLEGAL_a16 FPU_ILLEGAL_a16
 
 #ifdef FPU_8087
+const OpFn OP_TABLE(sf_fpu_8087_d8)[32] = {
+    // clang-format off
+        sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16,
+        sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16,
+        sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16,
+        sf_FADD_st0_stj,      sf_FMUL_st0_stj,      sf_FCOM_sti,      sf_FCOMP_sti,      sf_FSUB_st0_stj,      sf_FSUBR_st0_stj,      sf_FDIV_st0_stj,      sf_FDIVR_st0_stj,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_d9)[256] = {
+    // clang-format off
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,
+        sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,
+        sf_FNOP,       ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16, /*Invalid*/
+        sf_FCHS,       sf_FABS,       ILLEGAL_a16,  ILLEGAL_a16,  sf_FTST,       sf_FXAM,       ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLD1,       sf_FLDL2T,     sf_FLDL2E,     sf_FLDPI,      sf_FLDEG2,     sf_FLDLN2,     sf_FLDZ,       ILLEGAL_a16,
+        sf_F2XM1,      sf_FYL2X,      sf_FPTAN,      sf_FPATAN,     sf_FXTRACT,  sf_FPREM1,     sf_FDECSTP,    sf_FINCSTP,
+        sf_FPREM,      sf_FYL2XP1,    sf_FSQRT,      ILLEGAL_a16,   sf_FRNDINT,    sf_FSCALE,     ILLEGAL_a16,  ILLEGAL_a16
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_da)[256] = {
+    // clang-format off
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_db)[256] = {
+    // clang-format off
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FI,        sf_FI,         sf_FNCLEX,      sf_FNINIT,      ILLEGAL_a16,  sf_FNOP,       ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_dc)[32] = {
+    // clang-format off
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADD_sti_st0,     sf_FMUL_sti_st0,     ILLEGAL_a16, ILLEGAL_a16,  sf_FSUBR_sti_st0,    sf_FSUB_sti_st0,      sf_FDIVR_sti_st0,    sf_FDIV_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_dd)[256] = {
+    // clang-format off
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_de)[256] = {
+    // clang-format off
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,
+        sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  sf_FCOMPP,     ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,
+        sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,
+        sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,
+        sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_8087_df)[256] = {
+    // clang-format off
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
 const OpFn OP_TABLE(fpu_8087_d8)[32] = {
     // clang-format off
         opFADDs_a16, opFMULs_a16, opFCOMs_a16, opFCOMPs_a16, opFSUBs_a16, opFSUBRs_a16, opFDIVs_a16, opFDIVRs_a16,
@@ -570,8 +851,8 @@ const OpFn OP_TABLE(fpu_8087_d9)[256] = {
         ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16, /*Invalid*/
         opFCHS,       opFABS,       ILLEGAL_a16,  ILLEGAL_a16,  opFTST,       opFXAM,       ILLEGAL_a16,  ILLEGAL_a16,
         opFLD1,       opFLDL2T,     opFLDL2E,     opFLDPI,      opFLDEG2,     opFLDLN2,     opFLDZ,       ILLEGAL_a16,
-        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     ILLEGAL_a16,  ILLEGAL_a16,  opFDECSTP,    opFINCSTP,
-        opFPREM,      opFYL2XP1,    opFSQRT,      ILLEGAL_a16,   opFRNDINT,   opFSCALE,     ILLEGAL_a16,  ILLEGAL_a16
+        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     opFXTRACT,    opFPREM1,     opFDECSTP,    opFINCSTP,
+        opFPREM,      opFYL2XP1,    opFSQRT,      ILLEGAL_a16,  opFRNDINT,    opFSCALE,     ILLEGAL_a16,  ILLEGAL_a16
     // clang-format on
 };
 
@@ -786,6 +1067,1260 @@ const OpFn OP_TABLE(fpu_8087_df)[256] = {
 #else
 #    define ILLEGAL_a32 FPU_ILLEGAL_a32
 
+
+const OpFn OP_TABLE(sf_fpu_d8_a16)[32] = {
+    // clang-format off
+        sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16,
+        sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16,
+        sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16,
+        sf_FADD_st0_stj,      sf_FMUL_st0_stj,      sf_FCOM_sti,      sf_FCOMP_sti,      sf_FSUB_st0_stj,      sf_FSUBR_st0_stj,      sf_FDIV_st0_stj,      sf_FDIVR_st0_stj,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_d8_a32)[32] = {
+    // clang-format off
+        sf_FADDs_a32, sf_FMULs_a32, sf_FCOMs_a32, sf_FCOMPs_a32, sf_FSUBs_a32, sf_FSUBRs_a32, sf_FDIVs_a32, sf_FDIVRs_a32,
+        sf_FADDs_a32, sf_FMULs_a32, sf_FCOMs_a32, sf_FCOMPs_a32, sf_FSUBs_a32, sf_FSUBRs_a32, sf_FDIVs_a32, sf_FDIVRs_a32,
+        sf_FADDs_a32, sf_FMULs_a32, sf_FCOMs_a32, sf_FCOMPs_a32, sf_FSUBs_a32, sf_FSUBRs_a32, sf_FDIVs_a32, sf_FDIVRs_a32,
+        sf_FADD_st0_stj,      sf_FMUL_st0_stj,      sf_FCOM_sti,      sf_FCOMP_sti,      sf_FSUB_st0_stj,      sf_FSUBR_st0_stj,      sf_FDIV_st0_stj,      sf_FDIVR_st0_stj,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_d9_a16)[256] = {
+    // clang-format off
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,
+        sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,
+        sf_FNOP,       ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16, /*Invalid*/
+        sf_FCHS,       sf_FABS,       ILLEGAL_a16,  ILLEGAL_a16,  sf_FTST,       sf_FXAM,       ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLD1,       sf_FLDL2T,     sf_FLDL2E,     sf_FLDPI,      sf_FLDEG2,     sf_FLDLN2,     sf_FLDZ,       ILLEGAL_a16,
+        sf_F2XM1,      sf_FYL2X,      sf_FPTAN,      sf_FPATAN,     sf_FXTRACT,  sf_FPREM1,     sf_FDECSTP,    sf_FINCSTP,
+        sf_FPREM,      sf_FYL2XP1,    sf_FSQRT,      sf_FSINCOS,    sf_FRNDINT,    sf_FSCALE,     sf_FSIN,       sf_FCOS,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_d9_a32)[256] = {
+    // clang-format off
+        sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,
+        sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,
+        sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32,
+        sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,
+        sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32,
+        sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,
+
+        sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,
+        sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,
+        sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32,
+        sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,
+        sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32,
+        sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,
+
+        sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,
+        sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,
+        sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32,
+        sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,
+        sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32,
+        sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,
+
+        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,
+        sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,
+        sf_FNOP,       ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32, /*Invalid*/
+        sf_FCHS,       sf_FABS,       ILLEGAL_a32,  ILLEGAL_a32,  sf_FTST,       sf_FXAM,       ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLD1,       sf_FLDL2T,     sf_FLDL2E,     sf_FLDPI,      sf_FLDEG2,     sf_FLDLN2,     sf_FLDZ,       ILLEGAL_a32,
+        sf_F2XM1,      sf_FYL2X,      sf_FPTAN,      sf_FPATAN,     sf_FXTRACT,  sf_FPREM1,     sf_FDECSTP,    sf_FINCSTP,
+        sf_FPREM,      sf_FYL2XP1,    sf_FSQRT,      sf_FSINCOS,    sf_FRNDINT,    sf_FSCALE,     sf_FSIN,       sf_FCOS,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_d9_a16)[256] = {
+    // clang-format off
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,   sf_FLDs_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,   sf_FSTs_a16,
+        sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,  sf_FSTPs_a16,
+        sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16,
+        sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,  sf_FLDCW_a16,
+        sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16,
+        sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,  sf_FNSTCW_a16,
+
+        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,
+        sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,
+        sf_FNOP,       ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,  /*Invalid*/
+        sf_FCHS,       sf_FABS,       ILLEGAL_a16,  ILLEGAL_a16,  sf_FTST,       sf_FXAM,       ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLD1,       sf_FLDL2T,     sf_FLDL2E,     sf_FLDPI,      sf_FLDEG2,     sf_FLDLN2,     sf_FLDZ,       ILLEGAL_a16,
+        sf_F2XM1,      sf_FYL2X,      sf_FPTAN,      sf_FPATAN,     sf_FXTRACT,  sf_FPREM1,     sf_FDECSTP,    sf_FINCSTP,
+        sf_FPREM,      sf_FYL2XP1,    sf_FSQRT,      sf_FSINCOS,    sf_FRNDINT,    sf_FSCALE,     sf_FSIN,       sf_FCOS,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_d9_a32)[256] = {
+    // clang-format off
+        sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,
+        sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,
+        sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32,
+        sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,
+        sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32,
+        sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,
+
+        sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,
+        sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,
+        sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32,
+        sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,
+        sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32,
+        sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,
+
+        sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,   sf_FLDs_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,   sf_FSTs_a32,
+        sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,  sf_FSTPs_a32,
+        sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32,
+        sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,  sf_FLDCW_a32,
+        sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32,
+        sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,  sf_FNSTCW_a32,
+
+        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,        sf_FLD_sti,
+        sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,       sf_FXCH_sti,
+        sf_FNOP,       ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,       sf_FSTP_sti,  /*Invalid*/
+        sf_FCHS,       sf_FABS,       ILLEGAL_a32,  ILLEGAL_a32,  sf_FTST,       sf_FXAM,       ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLD1,       sf_FLDL2T,     sf_FLDL2E,     sf_FLDPI,      sf_FLDEG2,     sf_FLDLN2,     sf_FLDZ,       ILLEGAL_a32,
+        sf_F2XM1,      sf_FYL2X,      sf_FPTAN,      sf_FPATAN,     sf_FXTRACT,  sf_FPREM1,     sf_FDECSTP,    sf_FINCSTP,
+        sf_FPREM,      sf_FYL2XP1,    sf_FSQRT,      sf_FSINCOS,    sf_FRNDINT,    sf_FSCALE,     sf_FSIN,       sf_FCOS,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_da_a16)[256] = {
+    // clang-format off
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_da_a32)[256] = {
+    // clang-format off
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_da_a16)[256] = {
+    // clang-format off
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  sf_FUCOMPP,    ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_da_a32)[256] = {
+    // clang-format off
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  sf_FUCOMPP,    ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_686_da_a16)[256] = {
+    // clang-format off
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,  sf_FADDil_a16,
+        sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,  sf_FMULil_a16,
+        sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,  sf_FCOMil_a16,
+        sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16,
+        sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,  sf_FSUBil_a16,
+        sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16,
+        sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,  sf_FDIVil_a16,
+        sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16,
+
+        sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,
+        sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,
+        sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,
+        sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  sf_FUCOMPP,    ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_686_da_a32)[256] = {
+    // clang-format off
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,  sf_FADDil_a32,
+        sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,  sf_FMULil_a32,
+        sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,  sf_FCOMil_a32,
+        sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32,
+        sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,  sf_FSUBil_a32,
+        sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32,
+        sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,  sf_FDIVil_a32,
+        sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32,
+
+        sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,      sf_FCMOVB,
+        sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,      sf_FCMOVE,
+        sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,     sf_FCMOVBE,
+        sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,      sf_FCMOVU,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  sf_FUCOMPP,    ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_db_a16)[256] = {
+    // clang-format off
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNOP,        sf_FNOP,        sf_FNCLEX,       sf_FNINIT,       sf_FNOP,        sf_FNOP,        ILLEGAL_a16,       ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_db_a32)[256] = {
+    // clang-format off
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNOP,        sf_FNOP,        sf_FNCLEX,       sf_FNINIT,       sf_FNOP,        sf_FNOP,        ILLEGAL_a32,       ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_db_a16)[256] = {
+    // clang-format off
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNOP,        sf_FNOP,        sf_FNCLEX,       sf_FNINIT,       sf_FNOP,        sf_FNOP,        ILLEGAL_a16,       ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_db_a32)[256] = {
+    // clang-format off
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNOP,        sf_FNOP,        sf_FNCLEX,       sf_FNINIT,       sf_FNOP,        sf_FNOP,        ILLEGAL_a32,       ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_686_db_a16)[256] = {
+    // clang-format off
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,  sf_FILDil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,  sf_FISTil_a16,
+        sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,    sf_FLDe_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,   sf_FSTPe_a16,
+
+        sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,
+        sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,
+        sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,
+        sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,
+        sf_FNOP,        sf_FNOP,        sf_FNCLEX,       sf_FNINIT,       sf_FNOP,        sf_FNOP,        ILLEGAL_a16,       ILLEGAL_a16,
+        sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,
+        sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+const OpFn OP_TABLE(sf_fpu_686_db_a32)[256] = {
+    // clang-format off
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,  sf_FILDil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,  sf_FISTil_a32,
+        sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,    sf_FLDe_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,   sf_FSTPe_a32,
+
+        sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,     sf_FCMOVNB,
+        sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,     sf_FCMOVNE,
+        sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,    sf_FCMOVNBE,
+        sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,     sf_FCMOVNU,
+        sf_FNOP,        sf_FNOP,        sf_FNCLEX,       sf_FNINIT,       sf_FNOP,        sf_FNOP,        ILLEGAL_a32,       ILLEGAL_a32,
+        sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,      sf_FUCOMI_st0_stj,
+        sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,       sf_FCOMI_st0_stj,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_dc_a16)[32] = {
+    // clang-format off
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADD_sti_st0,     sf_FMUL_sti_st0,     ILLEGAL_a16, ILLEGAL_a16,  sf_FSUBR_sti_st0,    sf_FSUB_sti_st0,      sf_FDIVR_sti_st0,    sf_FDIV_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_dc_a32)[32] = {
+    // clang-format off
+        sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32,
+        sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32,
+        sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32,
+        sf_FADD_sti_st0,     sf_FMUL_sti_st0,     ILLEGAL_a32, ILLEGAL_a32,  sf_FSUBR_sti_st0,    sf_FSUB_sti_st0,      sf_FDIVR_sti_st0,    sf_FDIV_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_dc_a16)[32] = {
+    // clang-format off
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16,
+        sf_FADD_sti_st0,     sf_FMUL_sti_st0,     sf_FCOM_sti,      sf_FCOMP_sti,      sf_FSUBR_sti_st0,    sf_FSUB_sti_st0,      sf_FDIVR_sti_st0,    sf_FDIV_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_dc_a32)[32] = {
+    // clang-format off
+        sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32,
+        sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32,
+        sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32,
+        sf_FADD_sti_st0,     sf_FMUL_sti_st0,     sf_FCOM_sti,      sf_FCOMP_sti,      sf_FSUBR_sti_st0,    sf_FSUB_sti_st0,      sf_FDIVR_sti_st0,    sf_FDIV_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_dd_a16)[256] = {
+    // clang-format off
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_dd_a32)[256] = {
+    // clang-format off
+        sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,
+        sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,
+        sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,
+        sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,
+
+        sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,
+        sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,
+        sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,
+        sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,
+
+        sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,
+        sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,
+        sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,
+        sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,
+
+        sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_dd_a16)[256] = {
+    // clang-format off
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,    sf_FLDd_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,    sf_FSTd_a16,
+        sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,   sf_FSTPd_a16,
+        sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,   sf_FRSTOR_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,   sf_FNSAVE_a16,
+        sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,   sf_FNSTSW_a16,
+
+        sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,
+        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,
+        sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,
+        sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_dd_a32)[256] = {
+    // clang-format off
+        sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,
+        sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,
+        sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,
+        sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,
+
+        sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,
+        sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,
+        sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,
+        sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,
+
+        sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,    sf_FLDd_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,    sf_FSTd_a32,
+        sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,   sf_FSTPd_a32,
+        sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,   sf_FRSTOR_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,   sf_FNSAVE_a32,
+        sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,   sf_FNSTSW_a32,
+
+        sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,       sf_FFREE_sti,
+        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,
+        sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,         sf_FST_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,       sf_FUCOM_sti,
+        sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,      sf_FUCOMP_sti,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_de_a16)[256] = {
+    // clang-format off
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,
+        sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  sf_FCOMPP,     ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,
+        sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,
+        sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,
+        sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_de_a32)[256] = {
+    // clang-format off
+        sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,
+        sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,
+        sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,
+        sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32,
+        sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,
+        sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32,
+        sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,
+        sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32,
+
+        sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,
+        sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,
+        sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,
+        sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32,
+        sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,
+        sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32,
+        sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,
+        sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32,
+
+        sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,
+        sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,
+        sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,
+        sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32,
+        sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,
+        sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32,
+        sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,
+        sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32,
+
+        sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,
+        sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  sf_FCOMPP,     ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,
+        sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,
+        sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,
+        sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_de_a16)[256] = {
+    // clang-format off
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,  sf_FADDiw_a16,
+        sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,  sf_FMULiw_a16,
+        sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,  sf_FCOMiw_a16,
+        sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16,
+        sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,  sf_FSUBiw_a16,
+        sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16,
+        sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,  sf_FDIViw_a16,
+        sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16,
+
+        sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,
+        sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,
+        sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,
+        ILLEGAL_a16,  sf_FCOMPP,     ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,
+        sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,
+        sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,
+        sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_de_a32)[256] = {
+    // clang-format off
+        sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,
+        sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,
+        sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,
+        sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32,
+        sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,
+        sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32,
+        sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,
+        sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32,
+
+        sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,
+        sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,
+        sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,
+        sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32,
+        sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,
+        sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32,
+        sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,
+        sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32,
+
+        sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,  sf_FADDiw_a32,
+        sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,  sf_FMULiw_a32,
+        sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,  sf_FCOMiw_a32,
+        sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32,
+        sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,  sf_FSUBiw_a32,
+        sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32,
+        sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,  sf_FDIViw_a32,
+        sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32,
+
+        sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,       sf_FADDP_sti_st0,
+        sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,       sf_FMULP_sti_st0,
+        sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,       sf_FCOMP_sti,
+        ILLEGAL_a32,  sf_FCOMPP,     ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,      sf_FSUBRP_sti_st0,
+        sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,       sf_FSUBP_sti_st0,
+        sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,      sf_FDIVRP_sti_st0,
+        sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,       sf_FDIVP_sti_st0,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_df_a16)[256] = {
+    // clang-format off
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FNSTSW_AX,   ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_287_df_a32)[256] = {
+    // clang-format off
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FNSTSW_AX,   ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_df_a16)[256] = {
+    // clang-format off
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FNSTSW_AX,   ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_df_a32)[256] = {
+    // clang-format off
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FNSTSW_AX,   ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_686_df_a16)[256] = {
+    // clang-format off
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,  sf_FILDiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,  sf_FISTiw_a16,
+        sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,  sf_FILDiq_a16,
+        sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,     sf_FBSTP_PACKED_BCD_a16,
+        sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,   sf_FISTPiq_a16,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FNSTSW_AX,   ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+        sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,
+        sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,
+        ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,
+    // clang-format on
+};
+
+const OpFn OP_TABLE(sf_fpu_686_df_a32)[256] = {
+    // clang-format off
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,  sf_FILDiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,  sf_FISTiw_a32,
+        sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,  sf_FILDiq_a32,
+        sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,     sf_FBSTP_PACKED_BCD_a32,
+        sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,   sf_FISTPiq_a32,
+
+        sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,      sf_FFREEP_sti,
+        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,        sf_FXCH_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,        sf_FSTP_sti,
+        sf_FNSTSW_AX,   ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+        sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,     sf_FUCOMIP_st0_stj,
+        sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,      sf_FCOMIP_st0_stj,
+        ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,
+    // clang-format on
+};
+
 const OpFn OP_TABLE(fpu_d8_a16)[32] = {
     // clang-format off
         opFADDs_a16, opFMULs_a16, opFCOMs_a16, opFCOMPs_a16, opFSUBs_a16, opFSUBRs_a16, opFDIVs_a16, opFDIVRs_a16,
@@ -839,7 +2374,7 @@ const OpFn OP_TABLE(fpu_287_d9_a16)[256] = {
         ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16,  ILLEGAL_a16, /*Invalid*/
         opFCHS,       opFABS,       ILLEGAL_a16,  ILLEGAL_a16,  opFTST,       opFXAM,       ILLEGAL_a16,  ILLEGAL_a16,
         opFLD1,       opFLDL2T,     opFLDL2E,     opFLDPI,      opFLDEG2,     opFLDLN2,     opFLDZ,       ILLEGAL_a16,
-        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     ILLEGAL_a16,  opFPREM1,     opFDECSTP,    opFINCSTP,
+        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     opFXTRACT,  opFPREM1,     opFDECSTP,    opFINCSTP,
         opFPREM,      opFYL2XP1,    opFSQRT,      opFSINCOS,    opFRNDINT,    opFSCALE,     opFSIN,       opFCOS
     // clang-format on
 };
@@ -879,7 +2414,7 @@ const OpFn OP_TABLE(fpu_287_d9_a32)[256] = {
         ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32,  ILLEGAL_a32, /*Invalid*/
         opFCHS,       opFABS,       ILLEGAL_a32,  ILLEGAL_a32,  opFTST,       opFXAM,       ILLEGAL_a32,  ILLEGAL_a32,
         opFLD1,       opFLDL2T,     opFLDL2E,     opFLDPI,      opFLDEG2,     opFLDLN2,     opFLDZ,       ILLEGAL_a32,
-        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     ILLEGAL_a32,  opFPREM1,     opFDECSTP,    opFINCSTP,
+        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     opFXTRACT,    opFPREM1,     opFDECSTP,    opFINCSTP,
         opFPREM,      opFYL2XP1,    opFSQRT,      opFSINCOS,    opFRNDINT,    opFSCALE,     opFSIN,       opFCOS
     // clang-format on
 };
@@ -919,7 +2454,7 @@ const OpFn OP_TABLE(fpu_d9_a16)[256] = {
         opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,  /*Invalid*/
         opFCHS,       opFABS,       ILLEGAL_a16,  ILLEGAL_a16,  opFTST,       opFXAM,       ILLEGAL_a16,  ILLEGAL_a16,
         opFLD1,       opFLDL2T,     opFLDL2E,     opFLDPI,      opFLDEG2,     opFLDLN2,     opFLDZ,       ILLEGAL_a16,
-        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     ILLEGAL_a16,  opFPREM1,     opFDECSTP,    opFINCSTP,
+        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     opFXTRACT,  opFPREM1,     opFDECSTP,    opFINCSTP,
         opFPREM,      opFYL2XP1,    opFSQRT,      opFSINCOS,    opFRNDINT,    opFSCALE,     opFSIN,       opFCOS
     // clang-format on
 };
@@ -959,7 +2494,7 @@ const OpFn OP_TABLE(fpu_d9_a32)[256] = {
         opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,       opFSTP,  /*Invalid*/
         opFCHS,       opFABS,       ILLEGAL_a32,  ILLEGAL_a32,  opFTST,       opFXAM,       ILLEGAL_a32,  ILLEGAL_a32,
         opFLD1,       opFLDL2T,     opFLDL2E,     opFLDPI,      opFLDEG2,     opFLDLN2,     opFLDZ,       ILLEGAL_a32,
-        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     ILLEGAL_a32,  opFPREM1,     opFDECSTP,    opFINCSTP,
+        opF2XM1,      opFYL2X,      opFPTAN,      opFPATAN,     opFXTRACT,    opFPREM1,     opFDECSTP,    opFINCSTP,
         opFPREM,      opFYL2XP1,    opFSQRT,      opFSINCOS,    opFRNDINT,    opFSCALE,     opFSIN,       opFCOS
     // clang-format on
 };
diff --git a/src/cpu/x87_ops_misc.h b/src/cpu/x87_ops_misc.h
index 091d7cc31..bd2b05c52 100644
--- a/src/cpu/x87_ops_misc.h
+++ b/src/cpu/x87_ops_misc.h
@@ -33,6 +33,27 @@ opFNOP(uint32_t fetchdat)
     return 0;
 }
 
+static int
+opFXTRACT(uint32_t fetchdat)
+{
+    x87_conv_t test;
+    int64_t exp80, exp80final;
+    double mant;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    test.eind.d = ST(0);
+    exp80 = test.eind.ll & (0x7ff0000000000000ll);
+    exp80final = (exp80 >> 52) - BIAS64;
+    mant = test.eind.d / (pow(2.0, (double)exp80final));
+    ST(0) = (double)exp80final;
+    FP_TAG_VALID;
+    x87_push(mant);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxtract) : (x87_concurrency.fxtract * cpu_multi));
+    return 0;
+}
+
 static int
 opFCLEX(uint32_t fetchdat)
 {
@@ -741,7 +762,7 @@ opFPREM(uint32_t fetchdat)
     CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem) : (x87_concurrency.fprem * cpu_multi));
     return 0;
 }
-#ifndef FPU_8087
+
 static int
 opFPREM1(uint32_t fetchdat)
 {
@@ -762,7 +783,6 @@ opFPREM1(uint32_t fetchdat)
     CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem1) : (x87_concurrency.fprem1 * cpu_multi));
     return 0;
 }
-#endif
 
 static int
 opFSQRT(uint32_t fetchdat)
diff --git a/src/cpu/x87_ops_sf.h b/src/cpu/x87_ops_sf.h
new file mode 100644
index 000000000..e70556fea
--- /dev/null
+++ b/src/cpu/x87_ops_sf.h
@@ -0,0 +1,593 @@
+static uint32_t
+fpu_save_environment(void)
+{
+    int tag;
+    unsigned offset = 0;
+
+    /* read all registers in stack order and update x87 tag word */
+    for (int n = 0; n < 8; n++) {
+        // update tag only if it is not empty
+        if (!IS_TAG_EMPTY(n)) {
+            tag = FPU_tagof(FPU_read_regi(n));
+            FPU_settagi(tag, n);
+        }
+    }
+
+    fpu_state.swd = (fpu_state.swd & ~(7 << 11)) | ((fpu_state.tos & 7) << 11);
+
+    switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) {
+        case 0x000: { /*16-bit real mode*/
+            uint16_t tmp;
+            uint32_t fp_ip, fp_dp;
+
+            fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip;
+            fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp;
+
+            tmp = i387_get_control_word();
+            writememw(easeg, cpu_state.eaaddr + 0x00, tmp);
+            tmp = i387_get_status_word();
+            writememw(easeg, cpu_state.eaaddr + 0x02, tmp);
+            tmp = fpu_state.tag;
+            writememw(easeg, cpu_state.eaaddr + 0x04, tmp);
+            tmp = fp_ip & 0xffff;
+            writememw(easeg, cpu_state.eaaddr + 0x06, tmp);
+            tmp = (uint16_t)((fp_ip & 0xf0000) >> 4) | fpu_state.foo;
+            writememw(easeg, cpu_state.eaaddr + 0x08, tmp);
+            tmp = fp_dp & 0xffff;
+            writememw(easeg, cpu_state.eaaddr + 0x0a, tmp);
+            tmp = (uint16_t)((fp_dp & 0xf0000) >> 4);
+            writememw(easeg, cpu_state.eaaddr + 0x0c, tmp);
+            offset = 0x0e;
+        }
+        break;
+        case 0x001: {/*16-bit protected mode*/
+            uint16_t tmp;
+            tmp = i387_get_control_word();
+            writememw(easeg, cpu_state.eaaddr + 0x00, tmp);
+            tmp = i387_get_status_word();
+            writememw(easeg, cpu_state.eaaddr + 0x02, tmp);
+            tmp = fpu_state.tag;
+            writememw(easeg, cpu_state.eaaddr + 0x04, tmp);
+            tmp = (uint16_t)(fpu_state.fip) & 0xffff;
+            writememw(easeg, cpu_state.eaaddr + 0x06, tmp);
+            tmp = fpu_state.fcs;
+            writememw(easeg, cpu_state.eaaddr + 0x08, tmp);
+            tmp = (uint16_t)(fpu_state.fdp) & 0xffff;
+            writememw(easeg, cpu_state.eaaddr + 0x0a, tmp);
+            tmp = fpu_state.fds;
+            writememw(easeg, cpu_state.eaaddr + 0x0c, tmp);
+            offset = 0x0e;
+        }
+        break;
+        case 0x100: { /*32-bit real mode*/
+            uint32_t tmp, fp_ip, fp_dp;
+
+            fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip;
+            fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp;
+
+            tmp = 0xffff0000 | i387_get_control_word();
+            writememl(easeg, cpu_state.eaaddr + 0x00, tmp);
+            tmp = 0xffff0000 | i387_get_status_word();
+            writememl(easeg, cpu_state.eaaddr + 0x04, tmp);
+            tmp = 0xffff0000 | fpu_state.tag;
+            writememl(easeg, cpu_state.eaaddr + 0x08, tmp);
+            tmp = 0xffff0000 | (fp_ip & 0xffff);
+            writememl(easeg, cpu_state.eaaddr + 0x0c, tmp);
+            tmp = ((fp_ip & 0xffff0000) >> 4) | fpu_state.foo;
+            writememl(easeg, cpu_state.eaaddr + 0x10, tmp);
+            tmp = 0xffff0000 | (fp_dp & 0xffff);
+            writememl(easeg, cpu_state.eaaddr + 0x14, tmp);
+            tmp = (fp_dp & 0xffff0000) >> 4;
+            writememl(easeg, cpu_state.eaaddr + 0x18, tmp);
+            offset = 0x1c;
+        }
+        break;
+        case 0x101: { /*32-bit protected mode*/
+            uint32_t tmp;
+            tmp = 0xffff0000 | i387_get_control_word();
+            writememl(easeg, cpu_state.eaaddr + 0x00, tmp);
+            tmp = 0xffff0000 | i387_get_status_word();
+            writememl(easeg, cpu_state.eaaddr + 0x04, tmp);
+            tmp = 0xffff0000 | fpu_state.tag;
+            writememl(easeg, cpu_state.eaaddr + 0x08, tmp);
+            tmp = (uint32_t)(fpu_state.fip);
+            writememl(easeg, cpu_state.eaaddr + 0x0c, tmp);
+            tmp = fpu_state.fcs | (((uint32_t)(fpu_state.foo)) << 16);
+            writememl(easeg, cpu_state.eaaddr + 0x10, tmp);
+            tmp = (uint32_t)(fpu_state.fdp);
+            writememl(easeg, cpu_state.eaaddr + 0x14, tmp);
+            tmp = 0xffff0000 | fpu_state.fds;
+            writememl(easeg, cpu_state.eaaddr + 0x18, tmp);
+            offset = 0x1c;
+        }
+        break;
+    }
+
+    return (cpu_state.eaaddr + offset);
+}
+
+static uint32_t
+fpu_load_environment(void)
+{
+    unsigned offset = 0;
+
+    switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) {
+        case 0x000: { /*16-bit real mode*/
+            uint16_t tmp;
+            uint32_t fp_ip, fp_dp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c);
+            fp_dp = (tmp & 0xf000) << 4;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a);
+            fpu_state.fdp = fp_dp | tmp;
+            fpu_state.fds = 0;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x08);
+            fp_ip = (tmp & 0xf000) << 4;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x06);
+            fpu_state.fip = fp_ip | tmp;
+            fpu_state.fcs = 0;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x04);
+            fpu_state.tag = tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x02);
+            fpu_state.swd = tmp;
+            fpu_state.tos = (tmp >> 11) & 7;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x00);
+            fpu_state.cwd = tmp;
+            offset = 0x0e;
+        }
+        break;
+        case 0x001: {/*16-bit protected mode*/
+            uint16_t tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c);
+            fpu_state.fds = tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a);
+            fpu_state.fdp = tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x08);
+            fpu_state.fcs = tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x06);
+            fpu_state.fip = tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x04);
+            fpu_state.tag = tmp;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x02);
+            fpu_state.swd = tmp;
+            fpu_state.tos = (tmp >> 11) & 7;
+            tmp = readmemw(easeg, cpu_state.eaaddr + 0x00);
+            fpu_state.cwd = tmp;
+            offset = 0x0e;
+        }
+        break;
+        case 0x100: { /*32-bit real mode*/
+            uint32_t tmp, fp_ip, fp_dp;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x18);
+            fp_dp = (tmp & 0x0ffff000) << 4;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x14);
+            fp_dp |= (tmp & 0xffff);
+            fpu_state.fdp = fp_dp;
+            fpu_state.fds = 0;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x10);
+            fpu_state.foo = tmp & 0x07ff;
+            fp_ip = (tmp & 0x0ffff000) << 4;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c);
+            fp_ip |= (tmp & 0xffff);
+            fpu_state.fip = fp_ip;
+            fpu_state.fcs = 0;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x08);
+            fpu_state.tag = tmp & 0xffff;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x04);
+            fpu_state.swd = tmp & 0xffff;
+            fpu_state.tos = (tmp >> 11) & 7;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x00);
+            fpu_state.cwd = tmp & 0xffff;
+            offset = 0x1c;
+        }
+        break;
+        case 0x101: { /*32-bit protected mode*/
+            uint32_t tmp;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x18);
+            fpu_state.fds = tmp & 0xffff;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x14);
+            fpu_state.fdp = tmp;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x10);
+            fpu_state.fcs = tmp & 0xffff;
+            fpu_state.foo = (tmp >> 16) & 0x07ff;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c);
+            fpu_state.fip = tmp;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x08);
+            fpu_state.tag = tmp & 0xffff;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x04);
+            fpu_state.swd = tmp & 0xffff;
+            fpu_state.tos = (tmp >> 11) & 7;
+            tmp = readmeml(easeg, cpu_state.eaaddr + 0x00);
+            fpu_state.cwd = tmp & 0xffff;
+            offset = 0x1c;
+        }
+        break;
+    }
+
+    /* always set bit 6 as '1 */
+    fpu_state.cwd = (fpu_state.cwd & ~FPU_CW_Reserved_Bits) | 0x0040;
+
+    /* check for unmasked exceptions */
+    if (fpu_state.swd & ~fpu_state.cwd & FPU_CW_Exceptions_Mask) {
+        /* set the B and ES bits in the status-word */
+        fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
+    } else {
+        /* clear the B and ES bits in the status-word */
+        fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
+    }
+
+    return (cpu_state.eaaddr + offset);
+}
+
+static int
+sf_FLDCW_a16(uint32_t fetchdat)
+{
+    uint16_t tempw;
+
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    tempw = geteaw();
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.cwd = (tempw & ~FPU_CW_Reserved_Bits) | 0x0040; // bit 6 is reserved as '1
+    /* check for unmasked exceptions */
+    if (fpu_state.swd & (~fpu_state.cwd & FPU_CW_Exceptions_Mask)) {
+        /* set the B and ES bits in the status-word */
+        fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
+    } else {
+        /* clear the B and ES bits in the status-word */
+        fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldcw) : (x87_timings.fldcw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldcw) : (x87_concurrency.fldcw * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FLDCW_a32(uint32_t fetchdat)
+{
+    uint16_t tempw;
+
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    tempw = geteaw();
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.cwd = (tempw & ~FPU_CW_Reserved_Bits) | 0x0040; // bit 6 is reserved as '1
+    /* check for unmasked exceptions */
+    if (fpu_state.swd & (~fpu_state.cwd & FPU_CW_Exceptions_Mask)) {
+        /* set the B and ES bits in the status-word */
+        fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
+    } else {
+        /* clear the B and ES bits in the status-word */
+        fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldcw) : (x87_timings.fldcw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldcw) : (x87_concurrency.fldcw * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FNSTCW_a16(uint32_t fetchdat)
+{
+    uint16_t cwd = i387_get_control_word();
+
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    seteaw(cwd);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FNSTCW_a32(uint32_t fetchdat)
+{
+    uint16_t cwd = i387_get_control_word();
+
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    seteaw(cwd);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FNSTSW_a16(uint32_t fetchdat)
+{
+    uint16_t swd = i387_get_status_word();
+
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    seteaw(swd);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FNSTSW_a32(uint32_t fetchdat)
+{
+    uint16_t swd = i387_get_status_word();
+
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    seteaw(swd);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+#ifdef FPU_8087
+static int
+sf_FI(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    fpu_state.cwd &= ~FPU_SW_Summary;
+    if (rmdat == 0xe1)
+        fpu_state.cwd |= FPU_SW_Summary;
+    wait(3, 0);
+    return 0;
+}
+#else
+static int
+sf_FNSTSW_AX(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    AX = i387_get_status_word();
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FRSTOR_a16(uint32_t fetchdat)
+{
+    floatx80 tmp;
+    int offset;
+
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    offset = fpu_load_environment();
+    for (int n = 0; n < 8; n++) {
+        tmp.fraction = readmemq(easeg, offset + (n * 10));
+        tmp.exp = readmemw(easeg, offset + (n * 10) + 8);
+        FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frstor) : (x87_concurrency.frstor * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FRSTOR_a32(uint32_t fetchdat)
+{
+    floatx80 tmp;
+    int offset;
+
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    offset = fpu_load_environment();
+    for (int n = 0; n < 8; n++) {
+        tmp.fraction = readmemq(easeg, offset + (n * 10));
+        tmp.exp = readmemw(easeg, offset + (n * 10) + 8);
+        FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frstor) : (x87_concurrency.frstor * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FNSAVE_a16(uint32_t fetchdat)
+{
+    floatx80 stn;
+    int offset;
+
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    offset = fpu_save_environment();
+    /* save all registers in stack order. */
+    for (int m = 0; m < 8; m++) {
+        stn = FPU_read_regi(m);
+        writememq(easeg, offset + (m * 10), stn.fraction);
+        writememw(easeg, offset + (m * 10) + 8, stn.exp);
+    }
+
+#ifdef FPU_8087
+    fpu_state.swd = 0x3FF;
+#else
+    fpu_state.cwd = 0x37F;
+#endif
+    fpu_state.swd = 0;
+    fpu_state.tos = 0;
+    fpu_state.tag = 0xffff;
+    cpu_state.ismmx = 0;
+    fpu_state.foo = 0;
+    fpu_state.fds = 0;
+    fpu_state.fdp = 0;
+    fpu_state.fcs = 0;
+    fpu_state.fip = 0;
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FNSAVE_a32(uint32_t fetchdat)
+{
+    floatx80 stn;
+    int offset;
+
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    offset = fpu_save_environment();
+    /* save all registers in stack order. */
+    for (int m = 0; m < 8; m++) {
+        stn = FPU_read_regi(m);
+        writememq(easeg, offset + (m * 10), stn.fraction);
+        writememw(easeg, offset + (m * 10) + 8, stn.exp);
+    }
+
+#ifdef FPU_8087
+    fpu_state.swd = 0x3FF;
+#else
+    fpu_state.cwd = 0x37F;
+#endif
+    fpu_state.swd = 0;
+    fpu_state.tos = 0;
+    fpu_state.tag = 0xffff;
+    cpu_state.ismmx = 0;
+    fpu_state.foo = 0;
+    fpu_state.fds = 0;
+    fpu_state.fdp = 0;
+    fpu_state.fcs = 0;
+    fpu_state.fip = 0;
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FNCLEX(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary | FPU_SW_Stack_Fault | FPU_SW_Precision |
+               FPU_SW_Underflow | FPU_SW_Overflow | FPU_SW_Zero_Div | FPU_SW_Denormal_Op |
+               FPU_SW_Invalid);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FNINIT(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+#ifdef FPU_8087
+    fpu_state.cwd = 0x3FF;
+#else
+    fpu_state.cwd = 0x37F;
+#endif
+    fpu_state.swd = 0;
+    fpu_state.tos   = 0;
+    fpu_state.tag = 0xffff;
+    fpu_state.foo = 0;
+    fpu_state.fds = 0;
+    fpu_state.fdp = 0;
+    fpu_state.fcs = 0;
+    fpu_state.fip = 0;
+    cpu_state.ismmx = 0;
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.finit) : (x87_timings.finit * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.finit) : (x87_concurrency.finit * cpu_multi));
+    CPU_BLOCK_END();
+    return 0;
+}
+
+static int
+sf_FLDENV_a16(uint32_t fetchdat)
+{
+    int tag;
+
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    fpu_load_environment();
+    /* read all registers in stack order and update x87 tag word */
+    for (int n = 0; n < 8; n++) {
+        // update tag only if it is not empty
+        if (!IS_TAG_EMPTY(n)) {
+            tag = FPU_tagof(FPU_read_regi(n));
+            FPU_settagi(tag, n);
+        }
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldenv) : (x87_timings.fldenv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldenv) : (x87_concurrency.fldenv * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FLDENV_a32(uint32_t fetchdat)
+{
+    int tag;
+
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    fpu_load_environment();
+    /* read all registers in stack order and update x87 tag word */
+    for (int n = 0; n < 8; n++) {
+        // update tag only if it is not empty
+        if (!IS_TAG_EMPTY(n)) {
+            tag = FPU_tagof(FPU_read_regi(n));
+            FPU_settagi(tag, n);
+        }
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldenv) : (x87_timings.fldenv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldenv) : (x87_concurrency.fldenv * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FNSTENV_a16(uint32_t fetchdat)
+{
+    FP_ENTER();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    fpu_save_environment();
+    /* mask all floating point exceptions */
+    fpu_state.cwd |= FPU_CW_Exceptions_Mask;
+    /* clear the B and ES bits in the status word */
+    fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FNSTENV_a32(uint32_t fetchdat)
+{
+    FP_ENTER();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    fpu_save_environment();
+    /* mask all floating point exceptions */
+    fpu_state.cwd |= FPU_CW_Exceptions_Mask;
+    /* clear the B and ES bits in the status word */
+    fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FNOP(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi));
+    return 0;
+}
diff --git a/src/cpu/x87_ops_sf_arith.h b/src/cpu/x87_ops_sf_arith.h
new file mode 100644
index 000000000..10b100b2a
--- /dev/null
+++ b/src/cpu/x87_ops_sf_arith.h
@@ -0,0 +1,750 @@
+#define sf_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix)                                                                         \
+    static int sf_FADD##name##_a##a_size(uint32_t fetchdat)                                                                                         \
+    {                                                                                                                                              \
+        floatx80 a, result; \
+        struct float_status_t status; \
+        optype temp; \
+        FP_ENTER();                                                                                                                                \
+        FPU_check_pending_exceptions(); \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_stack_underflow(fetchdat, 0, 0); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (!is_nan) \
+            result = floatx80_add(a, use_var, &status); \
+        \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_save_regi(result, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+    static int sf_FDIV##name##_a##a_size(uint32_t fetchdat)                                                                                         \
+    {                                                                                                                                              \
+        floatx80 a, result; \
+        struct float_status_t status; \
+        optype temp;                                                                                                                                  \
+        FP_ENTER();                                                                                                                                \
+        FPU_check_pending_exceptions(); \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_stack_underflow(fetchdat, 0, 0); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (!is_nan) { \
+            result = floatx80_div(a, use_var, &status);        \
+        } \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_save_regi(result, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+    static int sf_FDIVR##name##_a##a_size(uint32_t fetchdat)                                                                                        \
+    {                                                                                                                                              \
+        floatx80 a, result; \
+        struct float_status_t status; \
+        optype temp; \
+        FP_ENTER();                                                                                                                                \
+        FPU_check_pending_exceptions(); \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_stack_underflow(fetchdat, 0, 0); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (!is_nan) { \
+            result = floatx80_div(use_var, a, &status);        \
+        } \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_save_regi(result, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv##cycle_postfix) : ((x87_concurrency.fdiv##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+    static int sf_FMUL##name##_a##a_size(uint32_t fetchdat)                                                                                         \
+    {                                                                                                                                              \
+        floatx80 a, result; \
+        struct float_status_t status; \
+        optype temp;                                                                                                                                  \
+        FP_ENTER();                                                                                                                                \
+        FPU_check_pending_exceptions(); \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_stack_underflow(fetchdat, 0, 0); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (!is_nan) { \
+            result = floatx80_mul(a, use_var, &status); \
+        } \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_save_regi(result, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul##cycle_postfix) : ((x87_timings.fmul##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul##cycle_postfix) : ((x87_concurrency.fmul##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+    static int sf_FSUB##name##_a##a_size(uint32_t fetchdat)                                                                                         \
+    {                                                                                                                                              \
+        floatx80 a, result; \
+        struct float_status_t status; \
+        optype temp;                                                                                                                                  \
+        FP_ENTER();                                                                                                                                \
+        FPU_check_pending_exceptions(); \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_stack_underflow(fetchdat, 0, 0); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (!is_nan) \
+            result = floatx80_sub(a, use_var, &status); \
+        \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_save_regi(result, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+    static int sf_FSUBR##name##_a##a_size(uint32_t fetchdat)                                                                                        \
+    {                                                                                                                                              \
+        floatx80 a, result; \
+        struct float_status_t status; \
+        optype temp;                                                                                                                                  \
+        FP_ENTER();                                                                                                                                \
+        FPU_check_pending_exceptions(); \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_stack_underflow(fetchdat, 0, 0); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (!is_nan) \
+            result = floatx80_sub(use_var, a, &status); \
+        \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_save_regi(result, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }
+
+// clang-format off
+sf_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), FPU_handle_NaN32(a, temp, &result, &status), _32)
+#ifndef FPU_8087
+sf_FPU(s, float32, 32, temp, geteal(), float32_to_floatx80(temp, &status), FPU_handle_NaN32(a, temp, &result, &status), _32)
+#endif
+sf_FPU(d, float64, 16, temp, geteaq(), float64_to_floatx80(temp, &status), FPU_handle_NaN64(a, temp, &result, &status), _64)
+#ifndef FPU_8087
+sf_FPU(d, float64, 32, temp, geteaq(), float64_to_floatx80(temp, &status), FPU_handle_NaN64(a, temp, &result, &status), _64)
+#endif
+
+sf_FPU(iw, uint16_t, 16, temp, geteaw(), int32_to_floatx80((int16_t)temp), 0, _i16)
+#ifndef FPU_8087
+sf_FPU(iw, uint16_t, 32, temp, geteaw(), int32_to_floatx80((int16_t)temp), 0, _i16)
+#endif
+sf_FPU(il, uint32_t, 16, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32)
+#ifndef FPU_8087
+sf_FPU(il, uint32_t, 32, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32)
+#endif
+// clang-format on
+
+static int
+sf_FADD_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_add(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+static int
+sf_FADD_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_add(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, fetchdat & 7);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FADDP_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_add(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FDIV_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_div(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FDIV_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_div(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, fetchdat & 7);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
+    return 0;
+}
+static int
+sf_FDIVP_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_div(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FDIVR_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_div(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
+    return 0;
+}
+static int
+sf_FDIVR_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_div(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, fetchdat & 7);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
+    return 0;
+}
+static int
+sf_FDIVRP_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_div(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FMUL_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_mul(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi));
+    return 0;
+}
+static int
+sf_FMUL_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_mul(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi));
+    return 0;
+}
+static int
+sf_FMULP_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_mul(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FSUB_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_sub(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+static int
+sf_FSUB_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_sub(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+static int
+sf_FSUBP_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_sub(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FSUBR_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(fetchdat & 7);
+    b = FPU_read_regi(0);
+    result = floatx80_sub(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+static int
+sf_FSUBR_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_sub(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+static int
+sf_FSUBRP_sti_st0(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    result = floatx80_sub(a, b, &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, fetchdat & 7);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FSQRT(uint32_t fetchdat)
+{
+    floatx80 result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = floatx80_sqrt(FPU_read_regi(0), &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsqrt) : (x87_timings.fsqrt * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsqrt) : (x87_concurrency.fsqrt * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FRNDINT(uint32_t fetchdat)
+{
+    floatx80 result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = floatx80_round_to_int(FPU_read_regi(0), &status);
+
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frndint) : (x87_timings.frndint * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frndint) : (x87_concurrency.frndint * cpu_multi));
+    return 0;
+}
diff --git a/src/cpu/x87_ops_sf_compare.h b/src/cpu/x87_ops_sf_compare.h
new file mode 100644
index 000000000..59135c05b
--- /dev/null
+++ b/src/cpu/x87_ops_sf_compare.h
@@ -0,0 +1,489 @@
+#define cmp_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix)                                                                         \
+    static int sf_FCOM##name##_a##a_size(uint32_t fetchdat)                                                                                         \
+    {                                                                                                                                              \
+        floatx80 a; \
+        int rc; \
+        struct float_status_t status; \
+        optype temp; \
+        FP_ENTER();                                                                                                                                \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \
+            setcc(C0 | C2 | C3); \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (is_nan) { \
+            rc = float_relation_unordered; \
+            float_raise(&status, float_flag_invalid); \
+        } else { \
+            rc = floatx80_compare_two(a, use_var, &status); \
+        } \
+        setcc(FPU_status_word_flags_fpu_compare(rc)); \
+        FPU_exception(fetchdat, status.float_exception_flags, 0); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+    static int sf_FCOMP##name##_a##a_size(uint32_t fetchdat)                                                                                        \
+    {                                                                                                                                              \
+        floatx80 a; \
+        int rc; \
+        struct float_status_t status; \
+        optype temp;                                                                                                                                  \
+        FP_ENTER();                                                                                                                                \
+        fetch_ea_##a_size(fetchdat);                                                                                                               \
+        SEG_CHECK_READ(cpu_state.ea_seg);                                                                                                          \
+        load_var = rw; \
+        if (cpu_state.abrt) \
+            return 1;\
+        clear_C1(); \
+        if (IS_TAG_EMPTY(0)) { \
+            FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \
+            setcc(C0 | C2 | C3); \
+            if (is_IA_masked()) \
+                FPU_pop(); \
+            \
+            goto next_ins; \
+        } \
+        status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
+        a = FPU_read_regi(0); \
+        if (is_nan) { \
+            rc = float_relation_unordered; \
+            float_raise(&status, float_flag_invalid); \
+        } else { \
+            rc = floatx80_compare_two(a, use_var, &status); \
+        } \
+        setcc(FPU_status_word_flags_fpu_compare(rc)); \
+        if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
+            FPU_pop(); \
+        \
+next_ins: \
+        CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi));           \
+        CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \
+        return 0;                                                                                                                                  \
+    }                                                                                                                                              \
+
+// clang-format off
+cmp_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32)
+#ifndef FPU_8087
+cmp_FPU(s, float32, 32, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32)
+#endif
+cmp_FPU(d, float64, 16, temp, geteaq(), float64_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float64_is_nan(temp), _64)
+#ifndef FPU_8087
+cmp_FPU(d, float64, 32, temp, geteaq(), float64_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float64_is_nan(temp), _64)
+#endif
+
+cmp_FPU(iw, int16_t, 16, temp, (int16_t)geteaw(), int32_to_floatx80((int32_t)temp), 0, _i16)
+#ifndef FPU_8087
+cmp_FPU(iw, int16_t, 32, temp, (int16_t)geteaw(), int32_to_floatx80((int32_t)temp), 0, _i16)
+#endif
+cmp_FPU(il, int32_t, 16, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32)
+#ifndef FPU_8087
+cmp_FPU(il, int32_t, 32, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32)
+#endif
+// clang-format on
+
+static int
+sf_FCOM_sti(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_two(a, b, &status);
+    setcc(FPU_status_word_flags_fpu_compare(rc));
+    FPU_exception(fetchdat, status.float_exception_flags, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FCOMP_sti(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+        if (is_IA_masked()) {
+            FPU_pop();
+        }
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_two(a, b, &status);
+    setcc(FPU_status_word_flags_fpu_compare(rc));
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FCOMPP(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+        if (is_IA_masked()) {
+            FPU_pop();
+            FPU_pop();
+        }
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(1);
+    rc = floatx80_compare_two(a, b, &status);
+    setcc(FPU_status_word_flags_fpu_compare(rc));
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_pop();
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
+    return 0;
+}
+
+#ifndef FPU_8087
+static int
+sf_FUCOMPP(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+        if (is_IA_masked()) {
+            FPU_pop();
+            FPU_pop();
+        }
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(1);
+    rc = floatx80_compare_quiet(a, b, &status);
+    setcc(FPU_status_word_flags_fpu_compare(rc));
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_pop();
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FCOMI_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    flags_rebuild();
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_two(a, b, &status);
+    FPU_write_eflags_fpu_compare(rc);
+    FPU_exception(fetchdat, status.float_exception_flags, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
+    return 0;
+}
+static int
+sf_FCOMIP_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    flags_rebuild();
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
+        if (is_IA_masked()) {
+            FPU_pop();
+        }
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_two(a, b, &status);
+    FPU_write_eflags_fpu_compare(rc);
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FUCOM_sti(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_quiet(a, b, &status);
+    setcc(FPU_status_word_flags_fpu_compare(rc));
+    FPU_exception(fetchdat, status.float_exception_flags, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FUCOMP_sti(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+        if (is_IA_masked())
+            FPU_pop();
+
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_quiet(a, b, &status);
+    setcc(FPU_status_word_flags_fpu_compare(rc));
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FUCOMI_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    flags_rebuild();
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_quiet(a, b, &status);
+    FPU_write_eflags_fpu_compare(rc);
+    FPU_exception(fetchdat, status.float_exception_flags, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
+    return 0;
+}
+static int
+sf_FUCOMIP_st0_stj(uint32_t fetchdat)
+{
+    floatx80 a, b;
+    struct float_status_t status;
+    int rc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    flags_rebuild();
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
+        if (is_IA_masked())
+            FPU_pop();
+
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(fetchdat & 7);
+    rc = floatx80_compare_quiet(a, b, &status);
+    FPU_write_eflags_fpu_compare(rc);
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FTST(uint32_t fetchdat)
+{
+    int rc;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        setcc(C0 | C2 | C3);
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        rc = floatx80_compare_two(FPU_read_regi(0), Const_Z, &status);
+        setcc(FPU_status_word_flags_fpu_compare(rc));
+        FPU_exception(fetchdat, status.float_exception_flags, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ftst) : (x87_timings.ftst * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ftst) : (x87_concurrency.ftst * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FXAM(uint32_t fetchdat)
+{
+    floatx80 reg;
+    int sign;
+    float_class_t aClass;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    reg = FPU_read_regi(0);
+    sign = floatx80_sign(reg);
+  /*
+   * Examine the contents of the ST(0) register and sets the condition
+   * code flags C0, C2 and C3 in the FPU status word to indicate the
+   * class of value or number in the register.
+   */
+    if (IS_TAG_EMPTY(0)) {
+        setcc(C3 | C1 | C0);
+    } else {
+        aClass = floatx80_class(reg);
+        switch (aClass) {
+            case float_zero:
+                setcc(C3 | C1);
+                break;
+            case float_SNaN:
+            case float_QNaN:
+                // unsupported handled as NaNs
+                if (floatx80_is_unsupported(reg)) {
+                    setcc(C1);
+                } else {
+                    setcc(C1 | C0);
+                }
+                break;
+            case float_negative_inf:
+            case float_positive_inf:
+                setcc(C2 | C1 | C0);
+                break;
+            case float_denormal:
+                setcc(C3 | C2 | C1);
+                break;
+            case float_normalized:
+                setcc(C2 | C1);
+                break;
+        }
+    }
+  /*
+   * The C1 flag is set to the sign of the value in ST(0), regardless
+   * of whether the register is empty or full.
+   */
+    if (!sign)
+        clear_C1();
+
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxam) : (x87_timings.fxam * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxam) : (x87_concurrency.fxam * cpu_multi));
+    return 0;
+}
diff --git a/src/cpu/x87_ops_sf_const.h b/src/cpu/x87_ops_sf_const.h
new file mode 100644
index 000000000..708c6ff7a
--- /dev/null
+++ b/src/cpu/x87_ops_sf_const.h
@@ -0,0 +1,131 @@
+/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
+   (and not one of RC_RND or RC_UP).
+   */
+#define DOWN_OR_CHOP()  (fpu_state.cwd & FPU_CW_RC & FPU_RC_DOWN)
+
+static __inline floatx80
+FPU_round_const(const floatx80 a, int adj)
+{
+  floatx80 result = a;
+  result.fraction += adj;
+  return result;
+}
+
+static int
+sf_FLDL2T(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(FPU_round_const(Const_L2T, (fpu_state.cwd & FPU_CW_RC) == FPU_RC_UP), 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FLDL2E(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(FPU_round_const(Const_L2E, DOWN_OR_CHOP() ? -1 : 0), 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FLDPI(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(FPU_round_const(Const_PI, DOWN_OR_CHOP() ? -1 : 0), 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FLDEG2(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(FPU_round_const(Const_LG2, DOWN_OR_CHOP() ? -1 : 0), 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FLDLN2(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(FPU_round_const(Const_LN2, DOWN_OR_CHOP() ? -1 : 0), 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FLD1(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(Const_1, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_z1) : (x87_timings.fld_z1 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_z1) : (x87_concurrency.fld_z1 * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FLDZ(uint32_t fetchdat)
+{
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1))
+        FPU_stack_overflow(fetchdat);
+    else {
+        FPU_push();
+        FPU_save_regi(Const_Z, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_z1) : (x87_timings.fld_z1 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_z1) : (x87_concurrency.fld_z1 * cpu_multi));
+    return 0;
+}
diff --git a/src/cpu/x87_ops_sf_load_store.h b/src/cpu/x87_ops_sf_load_store.h
new file mode 100644
index 000000000..69bc5598c
--- /dev/null
+++ b/src/cpu/x87_ops_sf_load_store.h
@@ -0,0 +1,1312 @@
+/*
+ * 86Box    A hypervisor and IBM PC system emulator that specializes in
+ *          running old operating systems and software designed for IBM
+ *          PC systems and compatibles from 1981 through fairly recent
+ *          system designs based on the PCI bus.
+ *
+ *          This file is part of the 86Box distribution.
+ *
+ *          x87 FPU instructions core.
+ *
+ *
+ *
+ * Authors: Sarah Walker, <https://pcem-emulator.co.uk/>
+ *          Miran Grca, <mgrca8@gmail.com>
+ *
+ *          Copyright 2008-2019 Sarah Walker.
+ *          Copyright 2016-2019 Miran Grca.
+ */
+
+#define swap_values16u(a, b) { uint16_t tmp = a; a = b; b = tmp; }
+
+static int
+sf_FILDiw_a16(uint32_t fetchdat)
+{
+    floatx80 result;
+    int16_t temp;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    temp = geteaw();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        result = int32_to_floatx80(temp);
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_16) : (x87_timings.fild_16 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_16) : (x87_concurrency.fild_16 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FILDiw_a32(uint32_t fetchdat)
+{
+    floatx80 result;
+    int16_t temp;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    temp = geteaw();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        result = int32_to_floatx80(temp);
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_16) : (x87_timings.fild_16 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_16) : (x87_concurrency.fild_16 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FILDil_a16(uint32_t fetchdat)
+{
+    floatx80 result;
+    int32_t templ;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    templ = geteal();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        result = int32_to_floatx80(templ);
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_32) : (x87_timings.fild_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_32) : (x87_concurrency.fild_32 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FILDil_a32(uint32_t fetchdat)
+{
+    floatx80 result;
+    int32_t templ;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    templ = geteal();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        result = int32_to_floatx80(templ);
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_32) : (x87_timings.fild_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_32) : (x87_concurrency.fild_32 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FILDiq_a16(uint32_t fetchdat)
+{
+    floatx80 result;
+    int64_t temp64;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    temp64 = geteaq();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        result = int64_to_floatx80(temp64);
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_64) : (x87_timings.fild_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_64) : (x87_concurrency.fild_64 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FILDiq_a32(uint32_t fetchdat)
+{
+    floatx80 result;
+    int64_t temp64;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    temp64 = geteaq();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        result = int64_to_floatx80(temp64);
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_64) : (x87_timings.fild_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_64) : (x87_concurrency.fild_64 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FLDs_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    floatx80 result;
+    float32 load_reg;
+    unsigned unmasked;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    load_reg = geteal();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = float32_to_floatx80(load_reg, &status);
+    unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0);
+    if (!(unmasked & FPU_CW_Invalid)) {
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FLDs_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    floatx80 result;
+    float32 load_reg;
+    unsigned unmasked;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    load_reg = geteal();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = float32_to_floatx80(load_reg, &status);
+    unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0);
+    if (!(unmasked & FPU_CW_Invalid)) {
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FLDd_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    floatx80 result;
+    float64 load_reg;
+    unsigned unmasked;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    load_reg = geteaq();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = float64_to_floatx80(load_reg, &status);
+    unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0);
+    if (!(unmasked & FPU_CW_Invalid)) {
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_64) : (x87_timings.fld_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_64) : (x87_concurrency.fld_64 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FLDd_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    floatx80 result;
+    float64 load_reg;
+    unsigned unmasked;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    load_reg = geteaq();
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = float64_to_floatx80(load_reg, &status);
+    unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0);
+    if (!(unmasked & FPU_CW_Invalid)) {
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_64) : (x87_timings.fld_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_64) : (x87_concurrency.fld_64 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FLDe_a16(uint32_t fetchdat)
+{
+    floatx80 result;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    result.fraction = readmemq(easeg, cpu_state.eaaddr);
+    result.exp = readmemw(easeg, cpu_state.eaaddr + 8);
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_80) : (x87_timings.fld_80 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_80) : (x87_concurrency.fld_80 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FLDe_a32(uint32_t fetchdat)
+{
+    floatx80 result;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_READ(cpu_state.ea_seg);
+    result.fraction = readmemq(easeg, cpu_state.eaaddr);
+    result.exp = readmemw(easeg, cpu_state.eaaddr + 8);
+    if (cpu_state.abrt)
+        return 1;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+    } else {
+        FPU_push();
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_80) : (x87_timings.fld_80 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_80) : (x87_concurrency.fld_80 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FLD_sti(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    floatx80 sti_reg;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    if (!IS_TAG_EMPTY(-1)) {
+        FPU_stack_overflow(fetchdat);
+        goto next_ins;
+    }
+    sti_reg = floatx80_default_nan;
+    if (IS_TAG_EMPTY(fetchdat & 7)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        sti_reg = FPU_read_regi(fetchdat & 7);
+    }
+
+    FPU_push();
+    FPU_save_regi(sti_reg, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld) : (x87_timings.fld * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld) : (x87_concurrency.fld * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FISTiw_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int16_t save_reg = int16_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked()) {
+            goto next_ins;
+        }
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int16(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaw(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FISTiw_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int16_t save_reg = int16_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int16(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaw(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FISTPiw_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int16_t save_reg = int16_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int16(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaw(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FISTPiw_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int16_t save_reg = int16_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int16(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaw(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FISTil_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int32_t save_reg = int32_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FISTil_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int32_t save_reg = int32_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FISTPil_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int32_t save_reg = int32_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FISTPil_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int32_t save_reg = int32_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FISTPiq_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int64_t save_reg = int64_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int64(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))  {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case origial FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaq(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_64) : (x87_timings.fist_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_64) : (x87_concurrency.fist_64 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FISTPiq_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    int64_t save_reg = int64_indefinite;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_int64(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case origial FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaq(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_64) : (x87_timings.fist_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_64) : (x87_concurrency.fist_64 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FBSTP_PACKED_BCD_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    uint16_t save_reg_hi = 0xffff;
+    uint64_t save_reg_lo = BX_CONST64(0xC000000000000000);
+    floatx80 reg;
+    int64_t save_val;
+    int sign;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        reg = FPU_read_regi(0);
+        save_val = floatx80_to_int64(reg, &status);
+        sign = (reg.exp & 0x8000) != 0;
+        if (sign)
+            save_val = -save_val;
+
+        if (save_val > BX_CONST64(999999999999999999))
+            status.float_exception_flags = float_flag_invalid; // throw away other flags
+
+        if (!(status.float_exception_flags & float_flag_invalid)) {
+            save_reg_hi = sign ? 0x8000 : 0;
+            save_reg_lo = 0;
+            for (int i = 0; i < 16; i++) {
+                save_reg_lo += ((uint64_t)(save_val % 10)) << (4 * i);
+                save_val /= 10;
+            }
+            save_reg_hi += (uint16_t)(save_val % 10);
+            save_val /= 10;
+            save_reg_hi += (uint16_t)(save_val % 10) << 4;
+        }
+        /* check for fpu arithmetic exceptions */
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+
+    // write packed bcd to memory
+    writememq(easeg, cpu_state.eaaddr, save_reg_lo);
+    writememw(easeg, cpu_state.eaaddr + 8, save_reg_hi);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fbstp) : (x87_timings.fbstp * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fbstp) : (x87_concurrency.fbstp * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FBSTP_PACKED_BCD_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    uint16_t save_reg_hi = 0xffff;
+    uint64_t save_reg_lo = BX_CONST64(0xC000000000000000);
+    floatx80 reg;
+    int64_t save_val;
+    int sign;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        reg = FPU_read_regi(0);
+        save_val = floatx80_to_int64(reg, &status);
+        sign = (reg.exp & 0x8000) != 0;
+        if (sign)
+            save_val = -save_val;
+
+        if (save_val > BX_CONST64(999999999999999999))
+            status.float_exception_flags = float_flag_invalid; // throw away other flags
+
+        if (!(status.float_exception_flags & float_flag_invalid)) {
+            save_reg_hi = sign ? 0x8000 : 0;
+            save_reg_lo = 0;
+            for (int i = 0; i < 16; i++) {
+                save_reg_lo += ((uint64_t)(save_val % 10)) << (4 * i);
+                save_val /= 10;
+            }
+            save_reg_hi += (uint16_t)(save_val % 10);
+            save_val /= 10;
+            save_reg_hi += (uint16_t)(save_val % 10) << 4;
+        }
+        /* check for fpu arithmetic exceptions */
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+
+    // write packed bcd to memory
+    writememq(easeg, cpu_state.eaaddr, save_reg_lo);
+    writememw(easeg, cpu_state.eaaddr + 8, save_reg_hi);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fbstp) : (x87_timings.fbstp * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fbstp) : (x87_concurrency.fbstp * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FSTs_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float32 save_reg = float32_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FSTs_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float32 save_reg = float32_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FSTPs_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float32 save_reg = float32_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    if (cpu_state.abrt) {
+        return 1;
+    }
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FSTPs_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float32 save_reg = float32_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float32(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteal(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FSTd_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float64 save_reg = float64_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float64(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaq(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi));
+    return cpu_state.abrt;
+}
+#ifndef FPU_8087
+static int
+sf_FSTd_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float64 save_reg = float64_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float64(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaq(save_reg);
+    fpu_state.swd = sw;
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi));
+    return cpu_state.abrt;
+}
+#endif
+
+static int
+sf_FSTPd_a16(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float64 save_reg = float64_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked()) {
+            goto next_ins;
+        }
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float64(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1)) {
+            goto next_ins;
+        }
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaq(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FSTPd_a32(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    uint16_t sw = fpu_state.swd;
+    float64 save_reg = float64_default_nan;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        status = i387cw_to_softfloat_status_word(i387_get_control_word());
+        save_reg = floatx80_to_float64(FPU_read_regi(0), &status);
+        if (FPU_exception(fetchdat, status.float_exception_flags, 1))
+            goto next_ins;
+    }
+    // store to the memory might generate an exception, in this case original FPU_SW must be kept
+    swap_values16u(sw, fpu_state.swd);
+    seteaq(save_reg);
+    if (cpu_state.abrt)
+        return 1;
+    fpu_state.swd = sw;
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FSTPe_a16(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    floatx80 save_reg;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_16(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    if (cpu_state.abrt)
+        return 1;
+    save_reg = floatx80_default_nan;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked()) {
+            goto next_ins;
+        }
+    } else {
+        save_reg = FPU_read_regi(0);
+    }
+    writememq(easeg, cpu_state.eaaddr, save_reg.fraction);
+    writememw(easeg, cpu_state.eaaddr + 8, save_reg.exp);
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_80) : (x87_timings.fst_80 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_80) : (x87_concurrency.fst_80 * cpu_multi));
+    return 0;
+}
+#ifndef FPU_8087
+static int
+sf_FSTPe_a32(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    floatx80 save_reg;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    fetch_ea_32(fetchdat);
+    SEG_CHECK_WRITE(cpu_state.ea_seg);
+    if (cpu_state.abrt)
+        return 1;
+    save_reg = floatx80_default_nan;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (!is_IA_masked())
+            goto next_ins;
+    } else {
+        save_reg = FPU_read_regi(0);
+    }
+    writememq(easeg, cpu_state.eaaddr, save_reg.fraction);
+    writememw(easeg, cpu_state.eaaddr + 8, save_reg.exp);
+    FPU_pop();
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_80) : (x87_timings.fst_80 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_80) : (x87_concurrency.fst_80 * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FST_sti(uint32_t fetchdat)
+{
+    floatx80 st0_reg;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
+    } else {
+        st0_reg = FPU_read_regi(0);
+        FPU_save_regi(st0_reg, fetchdat & 7);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst) : (x87_timings.fst * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst) : (x87_concurrency.fst * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FSTP_sti(uint32_t fetchdat)
+{
+    floatx80 st0_reg;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
+    } else {
+        st0_reg = FPU_read_regi(0);
+        FPU_save_regi(st0_reg, fetchdat & 7);
+        FPU_pop();
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst) : (x87_timings.fst * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst) : (x87_concurrency.fst * cpu_multi));
+    return 0;
+}
+
+#ifndef FPU_8087
+#    define sf_FCMOV(condition)                                                                      \
+        static int sf_FCMOV##condition(uint32_t fetchdat)                                            \
+        {                                                                                           \
+            FP_ENTER();                                                                             \
+            FPU_check_pending_exceptions(); \
+            cpu_state.pc++;                                                                         \
+            if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) \
+                FPU_stack_underflow(fetchdat, 0, 0); \
+            else { \
+                if (cond_##condition) {                                                                 \
+                    FPU_save_regi(FPU_read_regi(fetchdat & 7), 0); \
+                }                                                                                       \
+            } \
+            CLOCK_CYCLES_FPU(4);                                                                    \
+            return 0;                                                                               \
+        }
+
+#    define cond_U  (PF_SET())
+#    define cond_NU (!PF_SET())
+
+// clang-format off
+sf_FCMOV(B)
+sf_FCMOV(E)
+sf_FCMOV(BE)
+sf_FCMOV(U)
+sf_FCMOV(NB)
+sf_FCMOV(NE)
+sf_FCMOV(NBE)
+sf_FCMOV(NU)
+// clang-format on
+#endif
diff --git a/src/cpu/x87_ops_sf_misc.h b/src/cpu/x87_ops_sf_misc.h
new file mode 100644
index 000000000..d8a3d7368
--- /dev/null
+++ b/src/cpu/x87_ops_sf_misc.h
@@ -0,0 +1,134 @@
+static int
+sf_FXCH_sti(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    floatx80 st0_reg, sti_reg;
+    int st0_tag, sti_tag;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    st0_tag = FPU_gettagi(0);
+    sti_tag = FPU_gettagi(fetchdat & 7);
+    st0_reg = FPU_read_regi(0);
+    sti_reg = FPU_read_regi(fetchdat & 7);
+
+    clear_C1();
+    if ((st0_tag == X87_TAG_EMPTY) || (sti_tag == X87_TAG_EMPTY)) {
+        FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        if (is_IA_masked()) {
+            /* Masked response */
+            if (st0_tag == X87_TAG_EMPTY)
+                st0_reg = floatx80_default_nan;
+            if (sti_tag == X87_TAG_EMPTY)
+                sti_reg = floatx80_default_nan;
+        } else
+            goto next_ins;
+    }
+    FPU_save_regi(st0_reg, fetchdat & 7);
+    FPU_save_regi(sti_reg, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxch) : (x87_timings.fxch * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxch) : (x87_concurrency.fxch * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FCHS(uint32_t fetchdat)
+{
+    floatx80 st0_reg;
+    floatx80 result;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    if (IS_TAG_EMPTY(0))
+        FPU_stack_underflow(fetchdat, 0, 0);
+    else {
+        clear_C1();
+        st0_reg = FPU_read_regi(0);
+        result = floatx80_chs(st0_reg);
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fchs) : (x87_timings.fchs * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fchs) : (x87_concurrency.fchs * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FABS(uint32_t fetchdat)
+{
+    floatx80 st0_reg;
+    floatx80 result;
+
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    if (IS_TAG_EMPTY(0))
+        FPU_stack_underflow(fetchdat, 0, 0);
+    else {
+        clear_C1();
+        st0_reg = FPU_read_regi(0);
+        result = floatx80_abs(st0_reg);
+        FPU_save_regi(result, 0);
+    }
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fabs) : (x87_timings.fabs * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fabs) : (x87_concurrency.fabs * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FDECSTP(uint32_t fetchdat)
+{
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    fpu_state.tos = (fpu_state.tos - 1) & 7;
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fincdecstp) : (x87_timings.fincdecstp * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fincdecstp) : (x87_concurrency.fincdecstp * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FINCSTP(uint32_t fetchdat)
+{
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    fpu_state.tos = (fpu_state.tos + 1) & 7;
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fincdecstp) : (x87_timings.fincdecstp * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fincdecstp) : (x87_concurrency.fincdecstp * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FFREE_sti(uint32_t fetchdat)
+{
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    FPU_settagi(X87_TAG_EMPTY, fetchdat & 7);
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ffree) : (x87_timings.ffree * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ffree) : (x87_concurrency.ffree * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FFREEP_sti(uint32_t fetchdat)
+{
+    FP_ENTER();
+    FPU_check_pending_exceptions();
+    cpu_state.pc++;
+    clear_C1();
+    FPU_settagi(X87_TAG_EMPTY, fetchdat & 7);
+    if (cpu_state.abrt)
+        return 1;
+    FPU_pop();
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ffree) : (x87_timings.ffree * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ffree) : (x87_concurrency.ffree * cpu_multi));
+    return 0;
+}
diff --git a/src/cpu/x87_ops_sf_trans.h b/src/cpu/x87_ops_sf_trans.h
new file mode 100644
index 000000000..5289b2bbf
--- /dev/null
+++ b/src/cpu/x87_ops_sf_trans.h
@@ -0,0 +1,417 @@
+static int
+sf_F2XM1(uint32_t fetchdat)
+{
+    floatx80 result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    result = f2xm1(FPU_read_regi(0), &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.f2xm1) : (x87_timings.f2xm1 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.f2xm1) : (x87_concurrency.f2xm1 * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FYL2X(uint32_t fetchdat)
+{
+    floatx80 result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_stack_underflow(fetchdat, 1, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    result = fyl2x(FPU_read_regi(0), FPU_read_regi(1), &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_pop();
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fyl2x) : (x87_timings.fyl2x * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fyl2x) : (x87_concurrency.fyl2x * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FPTAN(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    floatx80 y;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    clear_C2();
+    if (IS_TAG_EMPTY(0) || !IS_TAG_EMPTY(-1)) {
+        if (IS_TAG_EMPTY(0))
+            FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        else
+            FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
+
+        /* The masked response */
+        if (is_IA_masked()) {
+            FPU_save_regi(floatx80_default_nan, 0);
+            FPU_push();
+            FPU_save_regi(floatx80_default_nan, 0);
+        }
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    y = FPU_read_regi(0);
+    if (ftan(&y, &status) == -1) {
+        fpu_state.swd |= C2;
+        goto next_ins;
+    }
+
+    if (floatx80_is_nan(y)) {
+        if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+            FPU_save_regi(y, 0);
+            FPU_push();
+            FPU_save_regi(y, 0);
+        }
+        goto next_ins;
+    }
+
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(y, 0);
+        FPU_push();
+        FPU_save_regi(Const_1, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fptan) : (x87_timings.fptan * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fptan) : (x87_concurrency.fptan * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FPATAN(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_stack_underflow(fetchdat, 1, 1);
+        goto next_ins;
+    }
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(1);
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    result = fpatan(a, b, &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_pop();
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fpatan) : (x87_timings.fpatan * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fpatan) : (x87_concurrency.fpatan * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FXTRACT(uint32_t fetchdat)
+{
+    struct float_status_t status;
+    floatx80 a, b;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+
+#if 0 //TODO
+    if ((IS_TAG_EMPTY(0) || IS_TAG_EMPTY(-1))) {
+        if (IS_TAG_EMPTY(0))
+            FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        else
+            FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
+
+        /* The masked response */
+        if (is_IA_masked()) {
+            FPU_save_regi(floatx80_default_nan, 0);
+            FPU_push();
+            FPU_save_regi(floatx80_default_nan, 0);
+        }
+        goto next_ins;
+    }
+#endif
+
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = floatx80_extract(&a, &status);
+    if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(b, 0); // exponent
+        FPU_push();
+        FPU_save_regi(a, 0); // fraction
+    }
+
+#if 0 //TODO.
+next_ins:
+#endif
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxtract) : (x87_concurrency.fxtract * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FPREM1(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+    uint64_t quotient = 0;
+    int flags, cc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    clear_C2();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(1);
+    flags = floatx80_ieee754_remainder(a, b, &result, &quotient, &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        if (flags >= 0) {
+            cc = 0;
+            if (flags)
+                cc = C2;
+            else {
+                if (quotient & 1)
+                    cc |= C1;
+                if (quotient & 2)
+                    cc |= C3;
+                if (quotient & 4)
+                    cc |= C0;
+            }
+            setcc(cc);
+        }
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fprem1) : (x87_timings.fprem1 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem1) : (x87_concurrency.fprem1 * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FPREM(uint32_t fetchdat)
+{
+    floatx80 a, b, result;
+    struct float_status_t status;
+    uint64_t quotient = 0;
+    int flags, cc;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    clear_C2();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    a = FPU_read_regi(0);
+    b = FPU_read_regi(1);
+    // handle unsupported extended double-precision floating encodings
+    flags = floatx80_remainder(a, b, &result, &quotient, &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        if (flags >= 0) {
+            cc = 0;
+            if (flags)
+                cc = C2;
+            else {
+                if (quotient & 1)
+                    cc |= C1;
+                if (quotient & 2)
+                    cc |= C3;
+                if (quotient & 4)
+                    cc |= C0;
+            }
+            setcc(cc);
+        }
+        FPU_save_regi(result, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fprem) : (x87_timings.fprem * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem) : (x87_concurrency.fprem * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FYL2XP1(uint32_t fetchdat)
+{
+    floatx80 result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_stack_underflow(fetchdat, 1, 1);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    result = fyl2xp1(FPU_read_regi(0), FPU_read_regi(1), &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(result, 1);
+        FPU_pop();
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fyl2xp1) : (x87_timings.fyl2xp1 * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fyl2xp1) : (x87_concurrency.fyl2xp1 * cpu_multi));
+    return 0;
+}
+
+#ifndef FPU_8087
+static int
+sf_FSINCOS(uint32_t fetchdat)
+{
+    const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+    struct float_status_t status;
+    floatx80 y, sin_y, cos_y;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    clear_C2();
+    if (IS_TAG_EMPTY(0) || !IS_TAG_EMPTY(-1)) {
+        if (IS_TAG_EMPTY(0))
+            FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
+        else
+            FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
+
+        /* The masked response */
+        if (is_IA_masked()) {
+            FPU_save_regi(floatx80_default_nan, 0);
+            FPU_push();
+            FPU_save_regi(floatx80_default_nan, 0);
+        }
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    y = FPU_read_regi(0);
+    if (fsincos(y, &sin_y, &cos_y, &status) == -1) {
+        fpu_state.swd |= C2;
+        goto next_ins;
+    }
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
+        FPU_save_regi(sin_y, 0);
+        FPU_push();
+        FPU_save_regi(cos_y, 0);
+    }
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsincos) : (x87_timings.fsincos * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsincos) : (x87_concurrency.fsincos * cpu_multi));
+    return 0;
+}
+#endif
+
+static int
+sf_FSCALE(uint32_t fetchdat)
+{
+    floatx80 result;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word());
+    result = floatx80_scale(FPU_read_regi(0), FPU_read_regi(1), &status);
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(result, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fscale) : (x87_timings.fscale * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fscale) : (x87_concurrency.fscale * cpu_multi));
+    return 0;
+}
+
+#ifndef FPU_8087
+static int
+sf_FSIN(uint32_t fetchdat)
+{
+    floatx80 y;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    clear_C2();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    y = FPU_read_regi(0);
+    if (fsin(&y, &status) == -1) {
+        fpu_state.swd |= C2;
+        goto next_ins;
+    }
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(y, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsin_cos) : (x87_timings.fsin_cos * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsin_cos) : (x87_concurrency.fsin_cos * cpu_multi));
+    return 0;
+}
+
+static int
+sf_FCOS(uint32_t fetchdat)
+{
+    floatx80 y;
+    struct float_status_t status;
+
+    FP_ENTER();
+    cpu_state.pc++;
+    clear_C1();
+    clear_C2();
+    if (IS_TAG_EMPTY(0)) {
+        FPU_stack_underflow(fetchdat, 0, 0);
+        goto next_ins;
+    }
+    status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
+    y = FPU_read_regi(0);
+    if (fcos(&y, &status) == -1) {
+        fpu_state.swd |= C2;
+        goto next_ins;
+    }
+    if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
+        FPU_save_regi(y, 0);
+
+next_ins:
+    CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsin_cos) : (x87_timings.fsin_cos * cpu_multi));
+    CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsin_cos) : (x87_concurrency.fsin_cos * cpu_multi));
+    return 0;
+}
+#endif
diff --git a/src/device.c b/src/device.c
index 5d739b13c..d0b502ca1 100644
--- a/src/device.c
+++ b/src/device.c
@@ -315,27 +315,13 @@ device_close_all(void)
 }
 
 void
-device_reset_all(void)
+device_reset_all(uint32_t match_flags)
 {
     int c;
 
     for (c = 0; c < DEVICE_MAX; c++) {
         if (devices[c] != NULL) {
-            if (devices[c]->reset != NULL)
-                devices[c]->reset(device_priv[c]);
-        }
-    }
-}
-
-/* Reset all attached PCI devices - needed for PCI turbo reset control. */
-void
-device_reset_all_pci(void)
-{
-    int c;
-
-    for (c = 0; c < DEVICE_MAX; c++) {
-        if (devices[c] != NULL) {
-            if ((devices[c]->reset != NULL) && (devices[c]->flags & DEVICE_PCI))
+            if ((devices[c]->reset != NULL) && (devices[c]->flags & match_flags))
                 devices[c]->reset(device_priv[c]);
         }
     }
diff --git a/src/device/CMakeLists.txt b/src/device/CMakeLists.txt
index e60856293..ef3a392ee 100644
--- a/src/device/CMakeLists.txt
+++ b/src/device/CMakeLists.txt
@@ -18,7 +18,9 @@
 add_library(dev OBJECT bugger.c cassette.c cartridge.c hasp.c hwm.c hwm_lm75.c hwm_lm78.c hwm_gl518sm.c
     hwm_vt82c686.c ibm_5161.c isamem.c isartc.c ../lpt.c pci_bridge.c
     postcard.c serial.c clock_ics9xxx.c isapnp.c i2c.c i2c_gpio.c
-    smbus_piix4.c smbus_ali7101.c keyboard.c keyboard_xt.c keyboard_at.c
+    smbus_piix4.c smbus_ali7101.c keyboard.c keyboard_xt.c
+    kbc_at.c kbc_at_dev.c
+    keyboard_at.c
     mouse.c mouse_bus.c mouse_serial.c mouse_ps2.c phoenix_486_jumper.c
     mouse_wacom_tablet.c serial_passthrough.c)
 
diff --git a/src/device/kbc_at.c b/src/device/kbc_at.c
new file mode 100644
index 000000000..aed771b9e
--- /dev/null
+++ b/src/device/kbc_at.c
@@ -0,0 +1,2218 @@
+/*
+ * 86Box    A hypervisor and IBM PC system emulator that specializes in
+ *          running old operating systems and software designed for IBM
+ *          PC systems and compatibles from 1981 through fairly recent
+ *          system designs based on the PCI bus.
+ *
+ *          This file is part of the 86Box distribution.
+ *
+ *          Intel 8042 (AT keyboard controller) emulation.
+ *
+ *
+ *
+ * Authors: Miran Grca, <mgrca8@gmail.com>
+ *          EngiNerd, <webmaster.crrc@yahoo.it>
+ *
+ *          Copyright 2023 Miran Grca.
+ *          Copyright 2023 EngiNerd.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#define HAVE_STDARG_H
+#include <wchar.h>
+#include <86box/86box.h>
+#include "cpu.h"
+#include <86box/timer.h>
+#include <86box/io.h>
+#include <86box/pic.h>
+#include <86box/pit.h>
+#include <86box/ppi.h>
+#include <86box/mem.h>
+#include <86box/device.h>
+#include <86box/machine.h>
+#include <86box/m_at_t3100e.h>
+#include <86box/fdd.h>
+#include <86box/fdc.h>
+#include <86box/sound.h>
+#include <86box/snd_speaker.h>
+#include <86box/video.h>
+#include <86box/keyboard.h>
+
+#define STAT_PARITY        0x80
+#define STAT_RTIMEOUT      0x40
+#define STAT_TTIMEOUT      0x20
+#define STAT_MFULL         0x20
+#define STAT_UNLOCKED      0x10
+#define STAT_CD            0x08
+#define STAT_SYSFLAG       0x04
+#define STAT_IFULL         0x02
+#define STAT_OFULL         0x01
+
+#define CCB_UNUSED         0x80
+#define CCB_TRANSLATE      0x40
+#define CCB_PCMODE         0x20
+#define CCB_ENABLEKBD      0x10
+#define CCB_IGNORELOCK     0x08
+#define CCB_SYSTEM         0x04
+#define CCB_ENABLEMINT     0x02
+#define CCB_ENABLEKINT     0x01
+
+#define CCB_MASK           0x68
+#define MODE_MASK          0x6c
+
+#define KBC_TYPE_ISA       0x00 /* AT ISA-based chips */
+#define KBC_TYPE_PS2_1     0x01 /* PS2 on PS/2, type 1 */
+#define KBC_TYPE_PS2_2     0x02 /* PS2 on PS/2, type 2 */
+#define KBC_TYPE_GREEN     0x03 /* PS2 green controller */
+#define KBC_TYPE_MASK      0x03
+
+#define KBC_VEN_GENERIC    0x00
+#define KBC_VEN_IBM_PS1    0x04
+#define KBC_VEN_TOSHIBA    0x08
+#define KBC_VEN_OLIVETTI   0x0c
+#define KBC_VEN_AMI        0x10
+#define KBC_VEN_TRIGEM_AMI 0x14
+#define KBC_VEN_QUADTEL    0x18
+#define KBC_VEN_PHOENIX    0x1c
+#define KBC_VEN_ACER       0x20
+#define KBC_VEN_NCR        0x24
+#define KBC_VEN_ALI        0x28
+#define KBC_VEN_MASK       0x3c
+
+#define FLAG_CLOCK         0x01
+#define FLAG_CACHE         0x02
+#define FLAG_PS2           0x04
+#define FLAG_PCI           0x08
+
+enum {
+    STATE_RESET = 0,       /* KBC reset state, only accepts command AA. */
+    STATE_KBC_DELAY_OUT,   /* KBC is sending one single byte. */
+    STATE_KBC_AMI_OUT,     /* KBC waiting for OBF - needed for AMIKey commands that require clearing of the output byte. */
+    STATE_MAIN_IBF,        /* KBC checking if the input buffer is full. */
+    STATE_MAIN_KBD,        /* KBC checking if the keyboard has anything to send. */
+    STATE_MAIN_AUX,        /* KBC checking if the auxiliary has anything to send. */
+    STATE_MAIN_BOTH,       /* KBC checking if either device has anything to send. */
+    STATE_KBC_OUT,         /* KBC is sending multiple bytes. */
+    STATE_KBC_PARAM,       /* KBC wants a parameter. */
+    STATE_SEND_KBD,        /* KBC is sending command to the keyboard. */
+    STATE_SCAN_KBD,        /* KBC is waiting for the keyboard command response. */
+    STATE_SEND_AUX,        /* KBC is sending command to the auxiliary device. */
+    STATE_SCAN_AUX         /* KBC is waiting for the auxiliary command response. */
+};
+
+typedef struct {
+    uint8_t state, command, command_phase, status,
+            wantdata, ib, ob, sc_or,
+            mem_addr, p1, p2, old_p2,
+            misc_flags, ami_flags, key_ctrl_queue_start, key_ctrl_queue_end,
+            val, channel, stat_hi, pending;
+
+    uint8_t mem[0x100];
+
+    /* Internal FIFO for the purpose of commands with multi-byte output. */
+    uint8_t key_ctrl_queue[64];
+
+    uint32_t flags;
+
+    /* Main timer. */
+    pc_timer_t send_delay_timer;
+
+    /* P2 pulse callback timer. */
+    pc_timer_t pulse_cb;
+
+    /* Local copies of the pointers to both ports for easier swapping (AMI '5' MegaKey). */
+    kbc_at_port_t     *ports[2];
+
+    uint8_t (*write60_ven)(void *p, uint8_t val);
+    uint8_t (*write64_ven)(void *p, uint8_t val);
+} atkbc_t;
+
+/* Keyboard controller ports. */
+kbc_at_port_t  *kbc_at_ports[2] = { NULL, NULL };
+
+static uint8_t kbc_ami_revision   = '8';
+static uint8_t kbc_award_revision = 0x42;
+
+static void (*kbc_at_do_poll)(atkbc_t *dev);
+
+/* Non-translated to translated scan codes. */
+static const uint8_t nont_to_t[256] = {
+    0xff, 0x43, 0x41, 0x3f, 0x3d, 0x3b, 0x3c, 0x58,
+    0x64, 0x44, 0x42, 0x40, 0x3e, 0x0f, 0x29, 0x59,
+    0x65, 0x38, 0x2a, 0x70, 0x1d, 0x10, 0x02, 0x5a,
+    0x66, 0x71, 0x2c, 0x1f, 0x1e, 0x11, 0x03, 0x5b,
+    0x67, 0x2e, 0x2d, 0x20, 0x12, 0x05, 0x04, 0x5c,
+    0x68, 0x39, 0x2f, 0x21, 0x14, 0x13, 0x06, 0x5d,
+    0x69, 0x31, 0x30, 0x23, 0x22, 0x15, 0x07, 0x5e,
+    0x6a, 0x72, 0x32, 0x24, 0x16, 0x08, 0x09, 0x5f,
+    0x6b, 0x33, 0x25, 0x17, 0x18, 0x0b, 0x0a, 0x60,
+    0x6c, 0x34, 0x35, 0x26, 0x27, 0x19, 0x0c, 0x61,
+    0x6d, 0x73, 0x28, 0x74, 0x1a, 0x0d, 0x62, 0x6e,
+    0x3a, 0x36, 0x1c, 0x1b, 0x75, 0x2b, 0x63, 0x76,
+    0x55, 0x56, 0x77, 0x78, 0x79, 0x7a, 0x0e, 0x7b,
+    0x7c, 0x4f, 0x7d, 0x4b, 0x47, 0x7e, 0x7f, 0x6f,
+    0x52, 0x53, 0x50, 0x4c, 0x4d, 0x48, 0x01, 0x45,
+    0x57, 0x4e, 0x51, 0x4a, 0x37, 0x49, 0x46, 0x54,
+    0x80, 0x81, 0x82, 0x41, 0x54, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+#ifdef ENABLE_KBC_AT_LOG
+int kbc_at_do_log = ENABLE_KBC_AT_LOG;
+
+static void
+kbc_at_log(const char *fmt, ...)
+{
+    va_list ap;
+
+    if (kbc_at_do_log) {
+        va_start(ap, fmt);
+        pclog_ex(fmt, ap);
+        va_end(ap);
+    }
+}
+#else
+#    define kbc_at_log(fmt, ...)
+#endif
+
+static void
+kbc_at_queue_reset(atkbc_t *dev)
+{
+    dev->key_ctrl_queue_start = dev->key_ctrl_queue_end = 0;
+    memset(dev->key_ctrl_queue, 0x00, sizeof(dev->key_ctrl_queue));
+}
+
+static void
+kbc_at_queue_add(atkbc_t *dev, uint8_t val)
+{
+    kbc_at_log("ATkbc: dev->key_ctrl_queue[%02X] = %02X;\n", dev->key_ctrl_queue_end, val);
+    dev->key_ctrl_queue[dev->key_ctrl_queue_end] = val;
+    dev->key_ctrl_queue_end                 = (dev->key_ctrl_queue_end + 1) & 0x3f;
+    dev->state = STATE_KBC_OUT;
+}
+
+static int
+kbc_translate(atkbc_t *dev, uint8_t val)
+{
+    int      xt_mode   = (dev->mem[0x20] & 0x20) && !(dev->misc_flags & FLAG_PS2);
+    /* The IBM AT keyboard controller firmware does not apply translation in XT mode. */
+    int      translate = !xt_mode && ((dev->mem[0x20] & 0x40) || ((dev->flags & KBC_TYPE_MASK) == KBC_TYPE_PS2_2));
+    uint8_t  kbc_ven   = dev->flags & KBC_VEN_MASK;
+    int      ret       = - 1;
+
+    /* Allow for scan code translation. */
+    if (translate && (val == 0xf0)) {
+        kbc_at_log("ATkbc: translate is on, F0 prefix detected\n");
+        dev->sc_or = 0x80;
+        return ret;
+    }
+
+    /* Skip break code if translated make code has bit 7 set. */
+    if (translate && (dev->sc_or == 0x80) && (nont_to_t[val] & 0x80)) {
+        kbc_at_log("ATkbc: translate is on, skipping scan code: %02X (original: F0 %02X)\n", nont_to_t[val], val);
+        dev->sc_or = 0;
+        return ret;
+    }
+
+    /* Test for T3100E 'Fn' key (Right Alt / Right Ctrl) */
+    if ((dev != NULL) && (kbc_ven == KBC_VEN_TOSHIBA) &&
+        (keyboard_recv(0x138) || keyboard_recv(0x11d)))  switch (val) {
+        case 0x4f:
+            t3100e_notify_set(0x01);
+            break; /* End */
+        case 0x50:
+            t3100e_notify_set(0x02);
+            break; /* Down */
+        case 0x51:
+            t3100e_notify_set(0x03);
+            break; /* PgDn */
+        case 0x52:
+            t3100e_notify_set(0x04);
+            break; /* Ins */
+        case 0x53:
+            t3100e_notify_set(0x05);
+            break; /* Del */
+        case 0x54:
+            t3100e_notify_set(0x06);
+            break; /* SysRQ */
+        case 0x45:
+            t3100e_notify_set(0x07);
+            break; /* NumLock */
+        case 0x46:
+            t3100e_notify_set(0x08);
+            break; /* ScrLock */
+        case 0x47:
+            t3100e_notify_set(0x09);
+            break; /* Home */
+        case 0x48:
+            t3100e_notify_set(0x0a);
+            break; /* Up */
+        case 0x49:
+            t3100e_notify_set(0x0b);
+            break; /* PgUp */
+        case 0x4a:
+            t3100e_notify_set(0x0c);
+            break; /* Keypad - */
+        case 0x4b:
+            t3100e_notify_set(0x0d);
+            break; /* Left */
+        case 0x4c:
+            t3100e_notify_set(0x0e);
+            break; /* KP 5 */
+        case 0x4d:
+            t3100e_notify_set(0x0f);
+            break; /* Right */
+    }
+
+    kbc_at_log("ATkbc: translate is %s, ", translate ? "on" : "off");
+#ifdef ENABLE_KEYBOARD_AT_LOG
+    kbc_at_log("scan code: ");
+    if (translate) {
+        kbc_at_log("%02X (original: ", (nont_to_t[val] | dev->sc_or));
+        if (dev->sc_or == 0x80)
+            kbc_at_log("F0 ");
+        kbc_at_log("%02X)\n", val);
+    } else
+        kbc_at_log("%02X\n", val);
+#endif
+
+    ret = translate ? (nont_to_t[val] | dev->sc_or) : val;
+
+    if (dev->sc_or == 0x80)
+        dev->sc_or = 0;
+
+    return ret;
+}
+
+static void
+kbc_send_to_ob(atkbc_t *dev, uint8_t val, uint8_t channel, uint8_t stat_hi)
+{
+    uint8_t kbc_ven = dev->flags & KBC_VEN_MASK;
+    int temp = (channel == 1) ? kbc_translate(dev, val) : val;
+
+    if (temp == -1)
+        return;
+
+    if ((kbc_ven == KBC_VEN_AMI) || (kbc_ven == KBC_VEN_TRIGEM_AMI) ||
+        (dev->misc_flags & FLAG_PS2))
+        stat_hi |= ((dev->p1 & 0x80) ? 0x10 : 0x00);
+    else
+        stat_hi |= 0x10;
+
+    kbc_at_log("ATkbc: Sending %02X to the output buffer on channel %i...\n", temp, channel);
+    dev->status = (dev->status & ~0xf0) | STAT_OFULL | stat_hi;
+
+    /* WARNING: On PS/2, all IRQ's are level-triggered, but the IBM PS/2 KBC firmware is explicitly
+                written to pulse its P2 IRQ bits, so they should be kept as as edge-triggered here. */
+    if (dev->misc_flags & FLAG_PS2) {
+        if (channel >= 2) {
+            dev->status |= STAT_MFULL;
+
+            if (dev->mem[0x20] & 0x02)
+                picint_common(1 << 12, 0, 1);
+            picint_common(1 << 1, 0, 0);
+        } else {
+            if (dev->mem[0x20] & 0x01)
+                picint_common(1 << 1, 0, 1);
+            picint_common(1 << 12, 0, 0);
+        }
+    } else if (dev->mem[0x20] & 0x01)
+        picintlevel(1 << 1); /* AT KBC: IRQ 1 is level-triggered because it is tied to OBF. */
+
+    dev->ob = temp;
+}
+
+static void
+kbc_delay_to_ob(atkbc_t *dev, uint8_t val, uint8_t channel, uint8_t stat_hi)
+{
+    dev->val = val;
+    dev->channel = channel;
+    dev->stat_hi = stat_hi;
+    dev->pending = 1;
+    dev->state = STATE_KBC_DELAY_OUT;
+}
+
+static void kbc_at_process_cmd(void *priv);
+
+static void
+set_enable_kbd(atkbc_t *dev, uint8_t enable)
+{
+    dev->mem[0x20] &= 0xef;
+    dev->mem[0x20] |= (enable ? 0x00 : 0x10);
+}
+
+static void
+set_enable_aux(atkbc_t *dev, uint8_t enable)
+{
+    dev->mem[0x20] &= 0xdf;
+    dev->mem[0x20] |= (enable ? 0x00 : 0x20);
+}
+
+static void
+kbc_ibf_process(atkbc_t *dev)
+{
+    /* IBF set, process both commands and data. */
+    dev->status &= ~STAT_IFULL;
+    dev->state   = STATE_MAIN_IBF;
+    if (dev->status & STAT_CD)
+        kbc_at_process_cmd(dev);
+    else {
+        set_enable_kbd(dev, 1);
+        if ((dev->ports[0] != NULL) && (dev->ports[0]->priv != NULL)) {
+            dev->ports[0]->wantcmd = 1;
+            dev->ports[0]->dat = dev->ib;
+            dev->state         = STATE_SEND_KBD;
+        } else
+            kbc_delay_to_ob(dev, 0xfe, 1, 0x40);
+    }
+}
+
+static void
+kbc_scan_kbd_at(atkbc_t *dev)
+{
+    if (!(dev->mem[0x20] & 0x10)) {
+        /* Both OBF and IBF clear and keyboard is enabled. */
+        /* XT mode. */
+        if (dev->mem[0x20] & 0x20) {
+            if ((dev->ports[0] != NULL) && (dev->ports[0]->out_new != -1)) {
+                kbc_send_to_ob(dev, dev->ports[0]->out_new, 1, 0x00);
+                dev->ports[0]->out_new = -1;
+                dev->state             = STATE_MAIN_IBF;
+            } else if (dev->status & STAT_IFULL)
+                kbc_ibf_process(dev);
+        /* AT mode. */
+        } else {
+            // dev->t = dev->mem[0x28];
+            if (dev->mem[0x2e] != 0x00) {
+                // if (!(dev->t & 0x02))
+                    // return;
+                dev->mem[0x2e] = 0x00;
+            }
+            dev->p2 &= 0xbf;
+            if ((dev->ports[0] != NULL) && (dev->ports[0]->out_new != -1)) {
+                /* In our case, we never have noise on the line, so we can simplify this. */
+                /* Read data from the keyboard. */
+                if (dev->mem[0x20] & 0x40) {
+                    if ((dev->mem[0x20] & 0x08) || (dev->p1 & 0x80))
+                        kbc_send_to_ob(dev, dev->ports[0]->out_new, 1, 0x00);
+                    dev->mem[0x2d] = (dev->ports[0]->out_new == 0xf0) ? 0x80 : 0x00;
+                } else
+                    kbc_send_to_ob(dev, dev->ports[0]->out_new, 1, 0x00);
+                dev->ports[0]->out_new = -1;
+                dev->state             = STATE_MAIN_IBF;
+            }
+        }
+    }
+}
+
+static void    write_p2(atkbc_t *dev, uint8_t val);
+
+static void
+kbc_at_poll_at(atkbc_t *dev)
+{
+    switch (dev->state) {
+        case STATE_RESET:
+            if (dev->status & STAT_IFULL) {
+                dev->status = ((dev->status & 0x0f) | 0x10) & ~STAT_IFULL;
+                if ((dev->status & STAT_CD) && (dev->ib == 0xaa))
+                    kbc_at_process_cmd(dev);
+            }
+            break;
+        case STATE_KBC_AMI_OUT:
+            if (dev->status & STAT_OFULL)
+                break;
+            /* FALLTHROUGH */
+        case STATE_MAIN_IBF:
+        default:
+at_main_ibf:
+           if (dev->status & STAT_OFULL) {
+                /* OBF set, wait until it is cleared but still process commands. */
+                if ((dev->status & STAT_IFULL) && (dev->status & STAT_CD)) {
+                    dev->status &= ~STAT_IFULL;
+                    kbc_at_process_cmd(dev);
+                }
+            } else if (dev->status & STAT_IFULL)
+                kbc_ibf_process(dev);
+            else if (!(dev->mem[0x20] & 0x10))
+                dev->state = STATE_MAIN_KBD;
+            break;
+        case STATE_MAIN_KBD:
+        case STATE_MAIN_BOTH:
+            if (dev->status & STAT_IFULL)
+                kbc_ibf_process(dev);
+            else {
+                (void) kbc_scan_kbd_at(dev);
+                dev->state = STATE_MAIN_IBF;
+            }
+            break;
+        case STATE_KBC_DELAY_OUT:
+            /* Keyboard controller command want to output a single byte. */
+            kbc_at_log("ATkbc: %02X coming from channel %i with high status %02X\n", dev->val, dev->channel, dev->stat_hi);
+            kbc_send_to_ob(dev, dev->val, dev->channel, dev->stat_hi);
+            // dev->state = (dev->pending == 2) ? STATE_KBC_AMI_OUT : STATE_MAIN_IBF;
+            dev->state = STATE_MAIN_IBF;
+            dev->pending = 0;
+            goto at_main_ibf;
+            break;
+        case STATE_KBC_OUT:
+            /* Keyboard controller command want to output multiple bytes. */
+            if (dev->status & STAT_IFULL) {
+                /* Data from host aborts dumping. */
+                dev->state = STATE_MAIN_IBF;
+                kbc_ibf_process(dev);
+            }
+            /* Do not continue dumping until OBF is clear. */
+            if (!(dev->status & STAT_OFULL)) {
+                kbc_at_log("ATkbc: %02X coming from channel 0\n", dev->key_ctrl_queue[dev->key_ctrl_queue_start]);
+                kbc_send_to_ob(dev, dev->key_ctrl_queue[dev->key_ctrl_queue_start], 0, 0x00);
+                dev->key_ctrl_queue_start = (dev->key_ctrl_queue_start + 1) & 0x3f;
+                if (dev->key_ctrl_queue_start == dev->key_ctrl_queue_end)
+                    dev->state = STATE_MAIN_IBF;
+            }
+            break;
+        case STATE_KBC_PARAM:
+            /* Keyboard controller command wants data, wait for said data. */
+            if (dev->status & STAT_IFULL) {
+                /* Command written, abort current command. */
+                if (dev->status & STAT_CD)
+                    dev->state = STATE_MAIN_IBF;
+
+                dev->status &= ~STAT_IFULL;
+                kbc_at_process_cmd(dev);
+            }
+            break;
+        case STATE_SEND_KBD:
+            if (!dev->ports[0]->wantcmd)
+                dev->state = STATE_SCAN_KBD;
+            break;
+        case STATE_SCAN_KBD:
+            kbc_scan_kbd_at(dev);
+            break;
+    }
+}
+
+/*
+    Correct Procedure:
+        1. Controller asks the device (keyboard or auxiliary device) for a byte.
+        2. The device, unless it's in the reset or command states, sees if there's anything to give it,
+           and if yes, begins the transfer.
+        3. The controller checks if there is a transfer, if yes, transfers the byte and sends it to the host,
+           otherwise, checks the next device, or if there is no device left to check, checks if IBF is full
+           and if yes, processes it.
+ */
+static int
+kbc_scan_kbd_ps2(atkbc_t *dev)
+{
+    if ((dev->ports[0] != NULL) && (dev->ports[0]->out_new != -1)) {
+        kbc_at_log("ATkbc: %02X coming from channel 1\n", dev->ports[0]->out_new & 0xff);
+        kbc_send_to_ob(dev, dev->ports[0]->out_new, 1, 0x00);
+        dev->ports[0]->out_new = -1;
+        dev->state             = STATE_MAIN_IBF;
+        return 1;
+    }
+
+    return 0;
+}
+
+static int
+kbc_scan_aux_ps2(atkbc_t *dev)
+{
+    if ((dev->ports[1] != NULL) && (dev->ports[1]->out_new != -1)) {
+        kbc_at_log("ATkbc: %02X coming from channel 2\n", dev->ports[1]->out_new & 0xff);
+        kbc_send_to_ob(dev, dev->ports[1]->out_new, 2, 0x00);
+        dev->ports[1]->out_new = -1;
+        dev->state             = STATE_MAIN_IBF;
+        return 1;
+    }
+
+    return 0;
+}
+
+static void
+kbc_at_poll_ps2(atkbc_t *dev)
+{
+    switch (dev->state) {
+        case STATE_RESET:
+            if (dev->status & STAT_IFULL) {
+                dev->status = ((dev->status & 0x0f) | 0x10) & ~STAT_IFULL;
+                if ((dev->status & STAT_CD) && (dev->ib == 0xaa))
+                    kbc_at_process_cmd(dev);
+            }
+            break;
+        case STATE_KBC_AMI_OUT:
+            if (dev->status & STAT_OFULL)
+                break;
+            /* FALLTHROUGH */
+        case STATE_MAIN_IBF:
+        default:
+ps2_main_ibf:
+            if (dev->status & STAT_IFULL)
+                kbc_ibf_process(dev);
+            else if (!(dev->status & STAT_OFULL)) {
+                if (dev->mem[0x20] & 0x20) {
+                    if (!(dev->mem[0x20] & 0x10)) {
+                        dev->p2 &= 0xbf;
+                        dev->state = STATE_MAIN_KBD;
+                    }
+                } else {
+                    dev->p2 &= 0xf7;
+                    if (dev->mem[0x20] & 0x10)
+                        dev->state = STATE_MAIN_AUX;
+                    else {
+                        dev->p2 &= 0xbf;
+                        dev->state = STATE_MAIN_BOTH;
+                    }
+                }
+            }
+            break;
+        case STATE_MAIN_KBD:
+            if (dev->status & STAT_IFULL)
+                kbc_ibf_process(dev);
+            else {
+                (void) kbc_scan_kbd_ps2(dev);
+                dev->state = STATE_MAIN_IBF;
+            }
+            break;
+        case STATE_MAIN_AUX:
+            if (dev->status & STAT_IFULL)
+                kbc_ibf_process(dev);
+            else {
+                (void) kbc_scan_aux_ps2(dev);
+                dev->state = STATE_MAIN_IBF;
+            }
+            break;
+        case STATE_MAIN_BOTH:
+            if (kbc_scan_kbd_ps2(dev))
+                dev->state = STATE_MAIN_IBF;
+            else
+                dev->state = STATE_MAIN_AUX;
+            break;
+        case STATE_KBC_DELAY_OUT:
+            /* Keyboard controller command want to output a single byte. */
+            kbc_at_log("ATkbc: %02X coming from channel %i with high status %02X\n", dev->val, dev->channel, dev->stat_hi);
+            kbc_send_to_ob(dev, dev->val, dev->channel, dev->stat_hi);
+            // dev->state = (dev->pending == 2) ? STATE_KBC_AMI_OUT : STATE_MAIN_IBF;
+            dev->state = STATE_MAIN_IBF;
+            dev->pending = 0;
+            goto ps2_main_ibf;
+            break;
+        case STATE_KBC_OUT:
+            /* Keyboard controller command want to output multiple bytes. */
+            if (dev->status & STAT_IFULL) {
+                /* Data from host aborts dumping. */
+                dev->state = STATE_MAIN_IBF;
+                kbc_ibf_process(dev);
+            }
+            /* Do not continue dumping until OBF is clear. */
+            if (!(dev->status & STAT_OFULL)) {
+                kbc_at_log("ATkbc: %02X coming from channel 0\n", dev->key_ctrl_queue[dev->key_ctrl_queue_start] & 0xff);
+                kbc_send_to_ob(dev, dev->key_ctrl_queue[dev->key_ctrl_queue_start], 0, 0x00);
+                dev->key_ctrl_queue_start = (dev->key_ctrl_queue_start + 1) & 0x3f;
+                if (dev->key_ctrl_queue_start == dev->key_ctrl_queue_end)
+                    dev->state = STATE_MAIN_IBF;
+            }
+            break;
+        case STATE_KBC_PARAM:
+            /* Keyboard controller command wants data, wait for said data. */
+            if (dev->status & STAT_IFULL) {
+                /* Command written, abort current command. */
+                if (dev->status & STAT_CD)
+                    dev->state = STATE_MAIN_IBF;
+
+                dev->status &= ~STAT_IFULL;
+                kbc_at_process_cmd(dev);
+            }
+            break;
+        case STATE_SEND_KBD:
+            if (!dev->ports[0]->wantcmd)
+                dev->state = STATE_SCAN_KBD;
+            break;
+        case STATE_SCAN_KBD:
+            (void) kbc_scan_kbd_ps2(dev);
+            break;
+        case STATE_SEND_AUX:
+            if (!dev->ports[1]->wantcmd)
+                dev->state = STATE_SCAN_AUX;
+            break;
+        case STATE_SCAN_AUX:
+            (void) kbc_scan_aux_ps2(dev);
+            break;
+    }
+}
+
+static void
+kbc_at_poll(void *priv)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    timer_advance_u64(&dev->send_delay_timer, (100ULL * TIMER_USEC));
+
+    /* TODO: Implement the password security state. */
+    kbc_at_do_poll(dev);
+
+    if ((kbc_at_ports[0] != NULL) && (kbc_at_ports[0]->priv != NULL))
+        kbc_at_ports[0]->poll(kbc_at_ports[0]->priv);
+
+    if ((kbc_at_ports[1] != NULL) && (kbc_at_ports[1]->priv != NULL))
+        kbc_at_ports[1]->poll(kbc_at_ports[1]->priv);
+}
+
+static void
+write_p2(atkbc_t *dev, uint8_t val)
+{
+    uint8_t old = dev->p2;
+    kbc_at_log("ATkbc: write P2: %02X (old: %02X)\n", val, dev->p2);
+
+    uint8_t kbc_ven = dev->flags & KBC_VEN_MASK;
+
+#if 0
+    /* PS/2: Handle IRQ's. */
+    if (dev->misc_flags & FLAG_PS2) {
+        /* IRQ 12 */
+        picint_common(1 << 12, 0, val & 0x20);
+
+        /* IRQ 1 */
+        picint_common(1 << 1, 0, val & 0x10);
+    }
+#endif
+
+    /* AT, PS/2: Handle A20. */
+    if ((mem_a20_key ^ val) & 0x02) { /* A20 enable change */
+        mem_a20_key = val & 0x02;
+        mem_a20_recalc();
+        flushmmucache();
+    }
+
+    /* AT, PS/2: Handle reset. */
+    /* 0 holds the CPU in the RESET state, 1 releases it. To simplify this,
+       we just do everything on release. */
+    if ((old ^ val) & 0x01) { /*Reset*/
+        if (!(val & 0x01)) {  /* Pin 0 selected. */
+            /* Pin 0 selected. */
+            kbc_at_log("write_p2(): Pulse reset!\n");
+            if (machines[machine].flags & MACHINE_COREBOOT) {
+                /* The SeaBIOS hard reset code attempts a KBC reset if ACPI RESET_REG
+                   is not available. However, the KBC reset is normally a soft reset, so
+                   SeaBIOS gets caught in a soft reset loop as it tries to hard reset the
+                   machine. Hack around this by making the KBC reset a hard reset only on
+                   coreboot machines. */
+                pc_reset_hard();
+            } else {
+                softresetx86(); /* Pulse reset! */
+                cpu_set_edx();
+                flushmmucache();
+                if (kbc_ven == KBC_VEN_ALI)
+                    smbase = 0x00030000;
+            }
+        }
+    }
+
+    /* Do this here to avoid an infinite reset loop. */
+    dev->p2 = val;
+}
+
+static void
+write_p2_fast_a20(atkbc_t *dev, uint8_t val)
+{
+    uint8_t old = dev->p2;
+    kbc_at_log("ATkbc: write P2 in fast A20 mode: %02X (old: %02X)\n", val, dev->p2);
+
+    /* AT, PS/2: Handle A20. */
+    if ((old ^ val) & 0x02) { /* A20 enable change */
+        mem_a20_key = val & 0x02;
+        mem_a20_recalc();
+        flushmmucache();
+    }
+
+    /* Do this here to avoid an infinite reset loop. */
+    dev->p2 = val;
+}
+
+static void
+write_cmd(atkbc_t *dev, uint8_t val)
+{
+    kbc_at_log("ATkbc: write command byte: %02X (old: %02X)\n", val, dev->mem[0x20]);
+
+    /* PS/2 type 2 keyboard controllers always force the XLAT bit to 0. */
+    if ((dev->flags & KBC_TYPE_MASK) == KBC_TYPE_PS2_2) {
+        val &= ~CCB_TRANSLATE;
+        dev->mem[0x20] &= ~CCB_TRANSLATE;
+    } else if (!(dev->misc_flags & FLAG_PS2)) {
+        if (val & 0x10)
+            dev->mem[0x2e] = 0x01;
+    }
+
+    kbc_at_log("ATkbc: keyboard interrupt is now %s\n", (val & 0x01) ? "enabled" : "disabled");
+
+    if (!(dev->misc_flags & FLAG_PS2)) {
+        /* Update P2 to mirror the IBF and OBF bits, if active. */
+        write_p2(dev, (dev->p2 & 0x0f) | ((val & 0x03) << 4) | ((val & 0x20) ? 0xc0 : 0x00));
+    }
+
+    kbc_at_log("ATkbc: Command byte now: %02X (%02X)\n", dev->mem[0x20], val);
+
+    dev->status = (dev->status & ~STAT_SYSFLAG) | (val & STAT_SYSFLAG);
+}
+
+static void
+pulse_output(atkbc_t *dev, uint8_t mask)
+{
+    if (mask != 0x0f) {
+        dev->old_p2 = dev->p2 & ~(0xf0 | mask);
+        kbc_at_log("ATkbc: pulse_output(): P2 now: %02X\n", dev->p2 & (0xf0 | mask));
+        write_p2(dev, dev->p2 & (0xf0 | mask));
+        timer_set_delay_u64(&dev->pulse_cb, 6ULL * TIMER_USEC);
+    }
+}
+
+static void
+pulse_poll(void *priv)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    kbc_at_log("ATkbc: pulse_poll(): P2 now: %02X\n", dev->p2 | dev->old_p2);
+    write_p2(dev, dev->p2 | dev->old_p2);
+}
+
+static uint8_t
+write64_generic(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+    uint8_t  current_drive, fixed_bits;
+    uint8_t  kbc_ven = 0x0;
+    kbc_ven          = dev->flags & KBC_VEN_MASK;
+
+    switch (val) {
+        case 0xa4: /* check if password installed */
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_at_log("ATkbc: check if password installed\n");
+                kbc_delay_to_ob(dev, 0xf1, 0, 0x00);
+                return 0;
+            }
+            break;
+
+        case 0xa5: /* load security */
+            kbc_at_log("ATkbc: load security\n");
+            dev->wantdata = 1;
+            dev->state = STATE_KBC_PARAM;
+            return 0;
+
+        case 0xa7: /* disable auxiliary port */
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_at_log("ATkbc: disable auxiliary port\n");
+                set_enable_aux(dev, 0);
+                return 0;
+            }
+            break;
+
+        case 0xa8: /* Enable auxiliary port */
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_at_log("ATkbc: enable auxiliary port\n");
+                set_enable_aux(dev, 1);
+                return 0;
+            }
+            break;
+
+        case 0xa9: /* Test auxiliary port */
+            kbc_at_log("ATkbc: test auxiliary port\n");
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_delay_to_ob(dev, 0x00, 0, 0x00); /* no error, this is testing the channel 2 interface */
+                return 0;
+            }
+            break;
+
+        case 0xaf: /* read keyboard version */
+            kbc_at_log("ATkbc: read keyboard version\n");
+            kbc_delay_to_ob(dev, kbc_award_revision, 0, 0x00);
+            return 0;
+
+        /*
+                                                                                P1 bits: 76543210
+                                                                                -----------------
+           IBM PS/1:                                                                     xxxxxxxx
+           IBM PS/2 MCA:                                                                 xxxxx1xx
+           Intel AMI Pentium BIOS'es with AMI MegaKey KB-5 keyboard controller:          x1x1xxxx
+           Acer:                                                                         xxxxx0xx
+           Packard Bell PB450:                                                           xxxxx1xx
+           P6RP4:                                                                        xx1xx1xx
+           Epson Action Tower 2600:                                                      xxxx01xx
+           TriGem Hawk:                                                                  xxxx11xx
+
+           Machine input based on current code:                                          11111111
+           Everything non-Green:    Pull down bit 7 if not PS/2 and keyboard is inhibited.
+                                    Pull down bit 6 if primary display is CGA.
+           Xi8088:                  Pull down bit 6 if primary display is MDA.
+           Acer:                    Pull down bit 6 if primary display is MDA.
+                                    Pull down bit 2 always (must be so to enable CMOS Setup).
+           IBM PS/1:                Pull down bit 6 if current floppy drive is 3.5".
+           Epson Action Tower 2600: Pull down bit 3 always (for Epson logo).
+           NCR:                     Pull down bit 5 always (power-on default speed = high).
+                                    Pull down bit 3 if there is no FPU.
+                                    Pull down bits 1 and 0 always?
+           Compaq:                  Pull down bit 6 if Compaq dual-scan display is in use.
+                                    Pull down bit 5 if system board DIP switch is ON.
+                                    Pull down bit 4 if CPU speed selected is auto.
+                                    Pull down bit 3 if CPU speed selected is slow (4 MHz).
+                                    Pull down bit 2 if FPU is present.
+                                    Pull down bits 1 and 0 always?
+
+           Bit 7: AT KBC only - keyboard inhibited (often physical lock): 0 = yes, 1 = no (also Compaq);
+           Bit 6: Mostly, display: 0 = CGA, 1 = MDA, inverted on Xi8088 and Acer KBC's;
+                  Intel AMI MegaKey KB-5: Used for green features, SMM handler expects it to be set;
+                  IBM PS/1 Model 2011: 0 = current FDD is 3.5", 1 = current FDD is 5.25";
+                  Comapq: 0 = Compaq dual-scan display, 1 = non-Compaq display.
+           Bit 5: Mostly, manufacturing jumper: 0 = installed (infinite loop at POST), 1 = not installed;
+                  NCR: power-on default speed: 0 = high, 1 = low;
+                  Compaq: System board DIP switch 5: 0 = ON, 1 = OFF.
+           Bit 4: (Which board?): RAM on motherboard: 0 = 512 kB, 1 = 256 kB;
+                  NCR: RAM on motherboard: 0 = unsupported, 1 = 512 kB;
+                  Intel AMI MegaKey KB-5: Must be 1;
+                  IBM PS/1: Ignored;
+                  Compaq: 0 = Auto speed selected, 1 = High speed selected.
+           Bit 3: TriGem AMIKey: most significant bit of 2-bit OEM ID;
+                  NCR: Coprocessor detect (1 = yes, 0 = no);
+                  Compaq: 0 = Slow (4 MHz), 1 = Fast (8 MHz);
+                  Sometimes configured for clock switching;
+           Bit 2: TriGem AMIKey: least significant bit of 2-bit OEM ID;
+                  Bit 3, 2:
+                      1, 1: TriGem logo;
+                      1, 0: Garbled logo;
+                      0, 1: Epson logo;
+                      0, 0: Generic AMI logo.
+                  NCR: Unused;
+                  IBM PS/2: Keyboard power: 0 = no power (fuse error), 1 = OK
+                  (for some reason, www.win.tue.nl has this in reverse);
+                  Compaq: FPU: 0 = 80287, 1 = none;
+                  Sometimes configured for clock switching;
+           Bit 1: PS/2: Auxiliary device data in;
+                  Compaq: Reserved;
+                  NCR: High/auto speed.
+           Bit 0: PS/2: Keyboard device data in;
+                  Compaq: Reserved;
+                  NCR: DMA mode.
+         */
+        case 0xc0: /* read P1 */
+            kbc_at_log("ATkbc: read P1\n");
+            fixed_bits = 4;
+            /* The SMM handlers of Intel AMI Pentium BIOS'es expect bit 6 to be set. */
+            if ((kbc_ven == KBC_VEN_AMI) && ((dev->flags & KBC_TYPE_MASK) == KBC_TYPE_GREEN))
+                fixed_bits |= 0x40;
+            if (kbc_ven == KBC_VEN_IBM_PS1) {
+                current_drive = fdc_get_current_drive();
+                /* (B0 or F0) | (fdd_is_525(current_drive) on bit 6) */
+                kbc_delay_to_ob(dev, dev->p1 | fixed_bits | (fdd_is_525(current_drive) ? 0x40 : 0x00),
+                                0, 0x00);
+            } else if (kbc_ven == KBC_VEN_NCR) {
+                /* switch settings
+                 * bit 7: keyboard disable
+                 * bit 6: display type (0 color, 1 mono)
+                 * bit 5: power-on default speed (0 high, 1 low)
+                 * bit 4: sense RAM size (0 unsupported, 1 512k on system board)
+                 * bit 3: coprocessor detect
+                 * bit 2: unused
+                 * bit 1: high/auto speed
+                 * bit 0: dma mode
+                 */
+                /* (B0 or F0) | 0x04 | (display on bit 6) | (fpu on bit 3) */
+                kbc_delay_to_ob(dev, (dev->p1 | fixed_bits | (video_is_mda() ? 0x40 : 0x00) | (hasfpu ? 0x08 : 0x00)) & 0xdf,
+                                0, 0x00);
+            } else if (kbc_ven == KBC_VEN_TRIGEM_AMI) {
+                /* Bit 3, 2:
+                       1, 1: TriGem logo;
+                       1, 0: Garbled logo;
+                       0, 1: Epson logo;
+                       0, 0: Generic AMI logo. */
+                if (dev->misc_flags & FLAG_PCI)
+                    fixed_bits |= 8;
+                /* (B0 or F0) | (0x04 or 0x0c) */
+                kbc_delay_to_ob(dev, dev->p1 | fixed_bits, 0, 0x00);
+            } else if (((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) && ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_GREEN))
+                /* (B0 or F0) | (0x08 or 0x0c) */
+                kbc_delay_to_ob(dev, ((dev->p1 | fixed_bits) & 0xf0) | (((dev->flags & KBC_VEN_MASK) == KBC_VEN_ACER) ? 0x08 : 0x0c), 0, 0x00);
+            else
+                /* (B0 or F0) | (0x04 or 0x44) */
+                kbc_delay_to_ob(dev, dev->p1 | fixed_bits, 0, 0x00);
+            dev->p1 = ((dev->p1 + 1) & 3) | (dev->p1 & 0xfc);
+            return 0;
+
+        case 0xc1: /*Copy bits 0 to 3 of P1 to status bits 4 to 7*/
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_at_log("ATkbc: copy bits 0 to 3 of P1 to status bits 4 to 7\n");
+                dev->status &= 0x0f;
+                dev->status |= (dev->p1 << 4);
+                return 0;
+            }
+            break;
+
+        case 0xc2: /*Copy bits 4 to 7 of P1 to status bits 4 to 7*/
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_at_log("ATkbc: copy bits 4 to 7 of P1 to status bits 4 to 7\n");
+                dev->status &= 0x0f;
+                dev->status |= (dev->p1 & 0xf0);
+                return 0;
+            }
+            break;
+
+        case 0xd3: /* write auxiliary output buffer */
+            if (dev->misc_flags & FLAG_PS2) {
+                kbc_at_log("ATkbc: write auxiliary output buffer\n");
+                dev->wantdata = 1;
+                dev->state = STATE_KBC_PARAM;
+                return 0;
+            }
+            break;
+
+        case 0xd4: /* write to auxiliary port */
+            kbc_at_log("ATkbc: write to auxiliary port\n");
+            dev->wantdata = 1;
+            dev->state = STATE_KBC_PARAM;
+            return 0;
+
+        case 0xf0 ... 0xff:
+            kbc_at_log("ATkbc: pulse %01X\n", val & 0x0f);
+            pulse_output(dev, val & 0x0f);
+            return 0;
+    }
+
+    kbc_at_log("ATkbc: bad command %02X\n", val);
+    return 1;
+}
+
+static uint8_t
+write60_ami(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    switch (dev->command) {
+        /* 0x40 - 0x5F are aliases for 0x60-0x7F */
+        case 0x40 ... 0x5f:
+            kbc_at_log("ATkbc: AMI - alias write to %08X\n", dev->command);
+            dev->mem[(dev->command & 0x1f) + 0x20] = val;
+            if (dev->command == 0x60)
+                write_cmd(dev, val);
+            return 0;
+
+        case 0xaf: /* set extended controller RAM */
+            kbc_at_log("ATkbc: AMI - set extended controller RAM\n");
+            if (dev->command_phase == 1) {
+                dev->mem_addr      = val;
+                dev->wantdata      = 1;
+                dev->state         = STATE_KBC_PARAM;
+                dev->command_phase = 2;
+            } else if (dev->command_phase == 2) {
+                dev->mem[dev->mem_addr] = val;
+                dev->command_phase      = 0;
+            }
+            return 0;
+
+        case 0xc1:
+            kbc_at_log("ATkbc: AMI MegaKey - write %02X to P1\n", val);
+            dev->p1 = val;
+            return 0;
+
+        case 0xcb: /* set keyboard mode */
+            kbc_at_log("ATkbc: AMI - set keyboard mode\n");
+            dev->ami_flags = val;
+            dev->misc_flags &= ~FLAG_PS2;
+            if (val & 0x01) {
+                kbc_at_log("ATkbc: AMI: Emulate PS/2 keyboard\n");
+                dev->misc_flags |= FLAG_PS2;
+                kbc_at_do_poll = kbc_at_poll_ps2;
+            } else {
+                kbc_at_log("ATkbc: AMI: Emulate AT keyboard\n");
+                kbc_at_do_poll = kbc_at_poll_at;
+            }
+            return 0;
+    }
+
+    return 1;
+}
+
+static uint8_t
+write64_ami(void *priv, uint8_t val)
+{
+    atkbc_t *dev     = (atkbc_t *) priv;
+    uint8_t  kbc_ven = dev->flags & KBC_VEN_MASK;
+
+    switch (val) {
+        case 0x00 ... 0x1f:
+            kbc_at_log("ATkbc: AMI - alias read from %08X\n", val);
+            kbc_delay_to_ob(dev, dev->mem[val + 0x20], 0, 0x00);
+            return 0;
+
+        case 0x40 ... 0x5f:
+            kbc_at_log("ATkbc: AMI - alias write to %08X\n", dev->command);
+            dev->wantdata = 1;
+            dev->state    = STATE_KBC_PARAM;
+            return 0;
+
+        case 0xa0: /* copyright message */
+            kbc_at_queue_add(dev, 0x28);
+            kbc_at_queue_add(dev, 0x00);
+            return 0;
+
+        case 0xa1: /* get controller version */
+            kbc_at_log("ATkbc: AMI - get controller version\n");
+            kbc_delay_to_ob(dev, kbc_ami_revision, 0, 0x00);
+            return 0;
+
+        case 0xa2: /* clear keyboard controller lines P22/P23 */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - clear KBC lines P22 and P23\n");
+                write_p2(dev, dev->p2 & 0xf3);
+                kbc_delay_to_ob(dev, 0x00, 0, 0x00);
+                return 0;
+            }
+            break;
+
+        case 0xa3: /* set keyboard controller lines P22/P23 */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - set KBC lines P22 and P23\n");
+                write_p2(dev, dev->p2 | 0x0c);
+                kbc_delay_to_ob(dev, 0x00, 0, 0x00);
+                return 0;
+            }
+            break;
+
+        case 0xa4: /* write clock = low */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - write clock = low\n");
+                dev->misc_flags &= ~FLAG_CLOCK;
+                return 0;
+            }
+            break;
+
+        case 0xa5: /* write clock = high */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - write clock = high\n");
+                dev->misc_flags |= FLAG_CLOCK;
+                return 0;
+            }
+
+        case 0xa6: /* read clock */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - read clock\n");
+                kbc_delay_to_ob(dev, (dev->misc_flags & FLAG_CLOCK) ? 0xff : 0x00, 0, 0x00);
+                return 0;
+            }
+            break;
+
+        case 0xa7: /* write cache bad */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - write cache bad\n");
+                dev->misc_flags &= FLAG_CACHE;
+                return 0;
+            }
+            break;
+
+        case 0xa8: /* write cache good */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - write cache good\n");
+                dev->misc_flags |= FLAG_CACHE;
+                return 0;
+            }
+            break;
+
+        case 0xa9: /* read cache */
+            if (!(dev->misc_flags & FLAG_PS2)) {
+                kbc_at_log("ATkbc: AMI - read cache\n");
+                kbc_delay_to_ob(dev, (dev->misc_flags & FLAG_CACHE) ? 0xff : 0x00, 0, 0x00);
+                return 0;
+            }
+            break;
+
+        case 0xaf: /* set extended controller RAM */
+            if (kbc_ven != KBC_VEN_ALI) {
+                kbc_at_log("ATkbc: set extended controller RAM\n");
+                dev->wantdata      = 1;
+                dev->state         = STATE_KBC_PARAM;
+                dev->command_phase = 1;
+                return 0;
+            }
+            break;
+
+        case 0xb0 ... 0xb3:
+            /* set KBC lines P10-P13 (P1 bits 0-3) low */
+            kbc_at_log("ATkbc: set KBC lines P10-P13 (P1 bits 0-3) low\n");
+            if (!(dev->flags & DEVICE_PCI) || (val > 0xb1))
+                dev->p1 &= ~(1 << (val & 0x03));
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+
+        /* TODO: The ICS SB486PV sends command B4 but expects to read *TWO* bytes. */
+        case 0xb4: case 0xb5:
+            /* set KBC lines P22-P23 (P2 bits 2-3) low */
+            kbc_at_log("ATkbc: set KBC lines P22-P23 (P2 bits 2-3) low\n");
+            if (!(dev->flags & DEVICE_PCI))
+                write_p2(dev, dev->p2 & ~(4 << (val & 0x01)));
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+
+        case 0xb8 ... 0xbb:
+            /* set KBC lines P10-P13 (P1 bits 0-3) high */
+            kbc_at_log("ATkbc: set KBC lines P10-P13 (P1 bits 0-3) high\n");
+            if (!(dev->flags & DEVICE_PCI) || (val > 0xb9)) {
+                dev->p1 |= (1 << (val & 0x03));
+                kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+                dev->pending++;
+            }
+            return 0;
+
+        case 0xbc: case 0xbd:
+            /* set KBC lines P22-P23 (P2 bits 2-3) high */
+            kbc_at_log("ATkbc: set KBC lines P22-P23 (P2 bits 2-3) high\n");
+            if (!(dev->flags & DEVICE_PCI))
+                write_p2(dev, dev->p2 | (4 << (val & 0x01)));
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+
+        case 0xc1: /* write P1 */
+            kbc_at_log("ATkbc: AMI MegaKey - write P1\n");
+            dev->wantdata  = 1;
+            dev->state     = STATE_KBC_PARAM;
+            return 0;
+
+        case 0xc4:
+            /* set KBC line P14 low */
+            kbc_at_log("ATkbc: set KBC line P14 (P1 bit 4) low\n");
+            dev->p1 &= 0xef;
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+        case 0xc5:
+            /* set KBC line P15 low */
+            kbc_at_log("ATkbc: set KBC line P15 (P1 bit 5) low\n");
+            dev->p1 &= 0xdf;
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+
+        case 0xc8:
+            /*
+             * unblock KBC lines P22/P23
+             * (allow command D1 to change bits 2/3 of P2)
+             */
+            kbc_at_log("ATkbc: AMI - unblock KBC lines P22 and P23\n");
+            dev->ami_flags &= 0xfb;
+            return 0;
+
+        case 0xc9:
+            /*
+             * block KBC lines P22/P23
+             * (disallow command D1 from changing bits 2/3 of the port)
+             */
+            kbc_at_log("ATkbc: AMI - block KBC lines P22 and P23\n");
+            dev->ami_flags |= 0x04;
+            return 0;
+
+        case 0xcc:
+            /* set KBC line P14 high */
+            kbc_at_log("ATkbc: set KBC line P14 (P1 bit 4) high\n");
+            dev->p1 |= 0x10;
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+        case 0xcd:
+            /* set KBC line P15 high */
+            kbc_at_log("ATkbc: set KBC line P15 (P1 bit 5) high\n");
+            dev->p1 |= 0x20;
+            kbc_delay_to_ob(dev, dev->ob, 0, 0x00);
+            dev->pending++;
+            return 0;
+
+        case 0xef: /* ??? - sent by AMI486 */
+            kbc_at_log("ATkbc: ??? - sent by AMI486\n");
+            return 0;
+    }
+
+    return write64_generic(dev, val);
+}
+
+static uint8_t
+write60_quadtel(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    switch (dev->command) {
+        case 0xcf: /*??? - sent by MegaPC BIOS*/
+            kbc_at_log("ATkbc: ??? - sent by MegaPC BIOS\n");
+            return 0;
+    }
+
+    return 1;
+}
+
+static uint8_t
+write64_olivetti(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    switch (val) {
+        case 0x80: /* Olivetti-specific command */
+            /*
+             * bit 7: bus expansion board present (M300) / keyboard unlocked (M290)
+             * bits 4-6: ???
+             * bit 3: fast ram check (if inactive keyboard works erratically)
+             * bit 2: keyboard fuse present
+             * bits 0-1: ???
+             */
+            kbc_delay_to_ob(dev, (0x0c | ((is386) ? 0x00 : 0x80)) & 0xdf, 0, 0x00);
+            dev->p1 = ((dev->p1 + 1) & 3) | (dev->p1 & 0xfc);
+            return 0;
+    }
+
+    return write64_generic(dev, val);
+}
+
+static uint8_t
+write64_quadtel(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    switch (val) {
+        case 0xaf:
+            kbc_at_log("ATkbc: bad KBC command AF\n");
+            return 1;
+
+        case 0xcf: /*??? - sent by MegaPC BIOS*/
+            kbc_at_log("ATkbc: ??? - sent by MegaPC BIOS\n");
+            dev->wantdata  = 1;
+            dev->state     = STATE_KBC_PARAM;
+            return 0;
+    }
+
+    return write64_generic(dev, val);
+}
+
+static uint8_t
+write60_toshiba(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    switch (dev->command) {
+        case 0xb6: /* T3100e - set color/mono switch */
+            kbc_at_log("ATkbc: T3100e - set color/mono switch\n");
+            t3100e_mono_set(val);
+            return 0;
+    }
+
+    return 1;
+}
+
+static uint8_t
+write64_toshiba(void *priv, uint8_t val)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    switch (val) {
+        case 0xaf:
+            kbc_at_log("ATkbc: bad KBC command AF\n");
+            return 1;
+
+        case 0xb0: /* T3100e: Turbo on */
+            kbc_at_log("ATkbc: T3100e: Turbo on\n");
+            t3100e_turbo_set(1);
+            return 0;
+
+        case 0xb1: /* T3100e: Turbo off */
+            kbc_at_log("ATkbc: T3100e: Turbo off\n");
+            t3100e_turbo_set(0);
+            return 0;
+
+        case 0xb2: /* T3100e: Select external display */
+            kbc_at_log("ATkbc: T3100e: Select external display\n");
+            t3100e_display_set(0x00);
+            return 0;
+
+        case 0xb3: /* T3100e: Select internal display */
+            kbc_at_log("ATkbc: T3100e: Select internal display\n");
+            t3100e_display_set(0x01);
+            return 0;
+
+        case 0xb4: /* T3100e: Get configuration / status */
+            kbc_at_log("ATkbc: T3100e: Get configuration / status\n");
+            kbc_delay_to_ob(dev, t3100e_config_get(), 0, 0x00);
+            return 0;
+
+        case 0xb5: /* T3100e: Get colour / mono byte */
+            kbc_at_log("ATkbc: T3100e: Get colour / mono byte\n");
+            kbc_delay_to_ob(dev, t3100e_mono_get(), 0, 0x00);
+            return 0;
+
+        case 0xb6: /* T3100e: Set colour / mono byte */
+            kbc_at_log("ATkbc: T3100e: Set colour / mono byte\n");
+            dev->wantdata  = 1;
+            dev->state     = STATE_KBC_PARAM;
+            return 0;
+
+        /* TODO: Toshiba KBC mode switching. */
+        case 0xb7: /* T3100e: Emulate PS/2 keyboard */
+        case 0xb8: /* T3100e: Emulate AT keyboard */
+            dev->misc_flags &= ~FLAG_PS2;
+            if (val == 0xb7) {
+                kbc_at_log("ATkbc: T3100e: Emulate PS/2 keyboard\n");
+                dev->misc_flags |= FLAG_PS2;
+                kbc_at_do_poll = kbc_at_poll_ps2;
+            } else {
+                kbc_at_log("ATkbc: T3100e: Emulate AT keyboard\n");
+                kbc_at_do_poll = kbc_at_poll_at;
+            }
+            return 0;
+
+        case 0xbb: /* T3100e: Read 'Fn' key.
+                      Return it for right Ctrl and right Alt; on the real
+                      T3100e, these keystrokes could only be generated
+                      using 'Fn'. */
+            kbc_at_log("ATkbc: T3100e: Read 'Fn' key\n");
+            if (keyboard_recv(0xb8) || /* Right Alt */
+                keyboard_recv(0x9d))   /* Right Ctrl */
+                kbc_delay_to_ob(dev, 0x04, 0, 0x00);
+            else
+                kbc_delay_to_ob(dev, 0x00, 0, 0x00);
+            return 0;
+
+        case 0xbc: /* T3100e: Reset Fn+Key notification */
+            kbc_at_log("ATkbc: T3100e: Reset Fn+Key notification\n");
+            t3100e_notify_set(0x00);
+            return 0;
+
+        case 0xc0: /* Read P1 */
+            kbc_at_log("ATkbc: read P1\n");
+
+            /* The T3100e returns all bits set except bit 6 which
+             * is set by t3100e_mono_set() */
+            dev->p1 = (t3100e_mono_get() & 1) ? 0xff : 0xbf;
+            kbc_delay_to_ob(dev, dev->p1, 0, 0x00);
+            return 0;
+    }
+
+    return write64_generic(dev, val);
+}
+
+static void
+kbc_at_process_cmd(void *priv)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+    int      i = 0, bad    = 1;
+    uint8_t  mask, kbc_ven = dev->flags & KBC_VEN_MASK;
+    uint8_t  cmd_ac_conv[16] = { 0x0b, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x1e, 0x30, 0x2e, 0x20, 0x12, 0x21 };
+
+    if (dev->status & STAT_CD) {
+        /* Controller command. */
+        dev->wantdata  = 0;
+        dev->state     = STATE_MAIN_IBF;
+
+        /* Clear the keyboard controller queue. */
+        kbc_at_queue_reset(dev);
+
+        switch (dev->ib) {
+            /* Read data from KBC memory. */
+            case 0x20 ... 0x3f:
+                kbc_delay_to_ob(dev, dev->mem[dev->ib], 0, 0x00);
+                if (dev->ib == 0x20)
+                    dev->pending++;
+                break;
+
+            /* Write data to KBC memory. */
+            case 0x60 ... 0x7f:
+                dev->wantdata  = 1;
+                dev->state     = STATE_KBC_PARAM;
+                break;
+
+            case 0xaa: /* self-test */
+                kbc_at_log("ATkbc: self-test\n");
+
+                if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) {
+                    if (dev->state != STATE_RESET) {
+                        kbc_at_log("ATkbc: self-test reinitialization\n");
+                        /* Yes, the firmware has an OR, but we need to make sure to keep any forcibly lowered bytes lowered. */
+                        /* TODO: Proper P1 implementation, with OR and AND flags in the machine table. */
+                        dev->p1 = dev->p1 & 0xff;
+                        write_p2(dev, 0x4b);
+                    }
+
+                    dev->status = (dev->status & 0x0f) | 0x60;
+
+                    dev->mem[0x20] = 0x30;
+                    dev->mem[0x22] = 0x0b;
+                    dev->mem[0x25] = 0x02;
+                    dev->mem[0x27] = 0xf8;
+                    dev->mem[0x28] = 0xce;
+                    dev->mem[0x29] = 0x0b;
+                    dev->mem[0x30] = 0x0b;
+                } else {
+                    if (dev->state != STATE_RESET) {
+                        kbc_at_log("ATkbc: self-test reinitialization\n");
+                        /* Yes, the firmware has an OR, but we need to make sure to keep any forcibly lowered bytes lowered. */
+                        /* TODO: Proper P1 implementation, with OR and AND flags in the machine table. */
+                        dev->p1 = dev->p1 & 0xff;
+                        write_p2(dev, 0xcf);
+                    }
+
+                    dev->status = (dev->status & 0x0f) | 0x60;
+
+                    dev->mem[0x20] = 0x10;
+                    dev->mem[0x22] = 0x06;
+                    dev->mem[0x25] = 0x01;
+                    dev->mem[0x27] = 0xfb;
+                    dev->mem[0x28] = 0xe0;
+                    dev->mem[0x29] = 0x06;
+                }
+
+                dev->mem[0x21] = 0x01;
+                dev->mem[0x2a] = 0x10;
+                dev->mem[0x2b] = 0x20;
+                dev->mem[0x2c] = 0x15;
+
+                if (dev->ports[0] != NULL)
+                    dev->ports[0]->out_new = -1;
+                if (dev->ports[1] != NULL)
+                    dev->ports[1]->out_new = -1;
+                kbc_at_queue_reset(dev);
+
+                kbc_at_queue_add(dev, 0x55);
+                break;
+
+            case 0xab: /* interface test */
+                kbc_at_log("ATkbc: interface test\n");
+                kbc_delay_to_ob(dev, 0x00, 0, 0x00); /*no error*/
+                break;
+
+            case 0xac: /* diagnostic dump */
+                if (dev->misc_flags & FLAG_PS2) {
+                    kbc_at_log("ATkbc: diagnostic dump\n");
+                    dev->mem[0x30] = (dev->p1 & 0xf0) | 0x80;
+                    dev->mem[0x31] = dev->p2;
+                    dev->mem[0x32] = 0x00;    /* T0 and T1. */
+                    dev->mem[0x33] = 0x00;    /* PSW - Program Status Word - always return 0x00 because we do not emulate this byte. */
+                    /* 20 bytes in high nibble in set 1, low nibble in set 1, set 1 space format = 60 bytes. */
+                    for (i = 0; i < 20; i++) {
+                        kbc_at_queue_add(dev, cmd_ac_conv[dev->mem[i + 0x20] >> 4]);
+                        kbc_at_queue_add(dev, cmd_ac_conv[dev->mem[i + 0x20] & 0x0f]);
+                        kbc_at_queue_add(dev, 0x39);
+                    }
+                }
+                break;
+
+            case 0xad: /* disable keyboard */
+                kbc_at_log("ATkbc: disable keyboard\n");
+                set_enable_kbd(dev, 0);
+                break;
+
+            case 0xae: /* enable keyboard */
+                kbc_at_log("ATkbc: enable keyboard\n");
+                set_enable_kbd(dev, 1);
+                break;
+
+            case 0xc7: /* set port1 bits */
+                kbc_at_log("ATkbc: Phoenix - set port1 bits\n");
+                dev->wantdata  = 1;
+                dev->state     = STATE_KBC_PARAM;
+                break;
+
+            case 0xca: /* read keyboard mode */
+                kbc_at_log("ATkbc: AMI - read keyboard mode\n");
+                kbc_delay_to_ob(dev, dev->ami_flags, 0, 0x00);
+                break;
+
+            case 0xcb: /* set keyboard mode */
+                kbc_at_log("ATkbc: AMI - set keyboard mode\n");
+                dev->wantdata  = 1;
+                dev->state     = STATE_KBC_PARAM;
+                break;
+
+            case 0xd0: /* read P2 */
+                kbc_at_log("ATkbc: read P2\n");
+                mask = 0xff;
+                if ((kbc_ven != KBC_VEN_OLIVETTI) && !(dev->misc_flags & FLAG_PS2) && (dev->mem[0x20] & 0x10))
+                    mask &= 0xbf;
+                kbc_delay_to_ob(dev, ((dev->p2 & 0xfd) | mem_a20_key) & mask, 0, 0x00);
+                break;
+
+            case 0xd1: /* write P2 */
+                kbc_at_log("ATkbc: write P2\n");
+                dev->wantdata  = 1;
+                dev->state     = STATE_KBC_PARAM;
+                break;
+
+            case 0xd2: /* write keyboard output buffer */
+                kbc_at_log("ATkbc: write keyboard output buffer\n");
+                dev->wantdata  = 1;
+                dev->state     = STATE_KBC_PARAM;
+                break;
+
+            case 0xdd: /* disable A20 address line */
+            case 0xdf: /* enable A20 address line */
+                kbc_at_log("ATkbc: %sable A20\n", (dev->ib == 0xdd) ? "dis" : "en");
+                write_p2_fast_a20(dev, (dev->p2 & 0xfd) | (dev->ib & 0x02));
+                break;
+
+            case 0xe0: /* read test inputs */
+                kbc_at_log("ATkbc: read test inputs\n");
+                kbc_delay_to_ob(dev, 0x00, 0, 0x00);
+                break;
+
+            default:
+                /*
+                 * Unrecognized controller command.
+                 *
+                 * If we have a vendor-specific handler, run
+                 * that. Otherwise, or if that handler fails,
+                 * log a bad command.
+                 */
+                if (dev->write64_ven)
+                    bad = dev->write64_ven(dev, dev->ib);
+
+                kbc_at_log(bad ? "ATkbc: bad controller command %02X\n" : "", dev->ib);
+        }
+
+        /* If the command needs data, remember the command. */
+        if (dev->wantdata)
+            dev->command = dev->ib;
+    } else if (dev->wantdata) {
+        /* Write data to controller. */
+        dev->wantdata = 0;
+        dev->state    = STATE_MAIN_IBF;
+
+        switch (dev->command) {
+            case 0x60 ... 0x7f:
+                dev->mem[(dev->command & 0x1f) + 0x20] = dev->ib;
+                if (dev->command == 0x60)
+                    write_cmd(dev, dev->ib);
+                break;
+
+            case 0xa5: /* load security */
+                if (dev->misc_flags & FLAG_PS2) {
+                    kbc_at_log("ATkbc: load security (%02X)\n", dev->ib);
+
+                    if (dev->ib != 0x00) {
+                        dev->wantdata = 1;
+                        dev->state = STATE_KBC_PARAM;
+                    }
+                }
+                break;
+
+            case 0xc7: /* set port1 bits */
+                kbc_at_log("ATkbc: Phoenix - set port1 bits\n");
+                dev->p1 |= dev->ib;
+                break;
+
+            case 0xd1: /* write P2 */
+                kbc_at_log("ATkbc: write P2\n");
+                /* Bit 2 of AMI flags is P22-P23 blocked (1 = yes, 0 = no),
+                   discovered by reverse-engineering the AOpen Vi15G BIOS. */
+                if (dev->ami_flags & 0x04) {
+                    /* If keyboard controller lines P22-P23 are blocked,
+                       we force them to remain unchanged. */
+                    dev->ib &= ~0x0c;
+                    dev->ib |= (dev->p2 & 0x0c);
+                }
+                write_p2(dev, dev->ib | 0x01);
+                break;
+
+            case 0xd2: /* write to keyboard output buffer */
+                kbc_at_log("ATkbc: write to keyboard output buffer\n");
+                kbc_delay_to_ob(dev, dev->ib, 0, 0x00);
+                break;
+
+            case 0xd3: /* write to auxiliary output buffer */
+                kbc_at_log("ATkbc: write to auxiliary output buffer\n");
+                kbc_delay_to_ob(dev, dev->ib, 2, 0x00);
+                break;
+
+            case 0xd4: /* write to auxiliary port */
+                kbc_at_log("ATkbc: write to auxiliary port (%02X)\n", dev->ib);
+
+                if (dev->ib == 0xbb)
+                    break;
+
+                if (dev->misc_flags & FLAG_PS2) {
+                    set_enable_aux(dev, 1);
+                    if ((dev->ports[1] != NULL) && (dev->ports[1]->priv != NULL)) {
+                        dev->ports[1]->wantcmd = 1;
+                        dev->ports[1]->dat = dev->ib;
+                        dev->state         = STATE_SEND_AUX;
+                    } else
+                        kbc_delay_to_ob(dev, 0xfe, 2, 0x40);
+                }
+                break;
+
+            default:
+                /*
+                 * Run the vendor-specific handler
+                 * if we have one. Otherwise, or if
+                 * it returns an error, log a bad
+                 * controller command.
+                 */
+                if (dev->write60_ven)
+                    bad = dev->write60_ven(dev, dev->ib);
+
+                if (bad) {
+                    kbc_at_log("ATkbc: bad controller command %02x data %02x\n", dev->command, dev->ib);
+                }
+        }
+    }
+}
+
+static void
+kbc_at_write(uint16_t port, uint8_t val, void *priv)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+
+    kbc_at_log("ATkbc: [%04X:%08X] write(%04X) = %02X\n", CS, cpu_state.pc, port, val);
+
+    switch (port) {
+        case 0x60:
+            dev->status &= ~STAT_CD;
+            if (dev->wantdata && (dev->command == 0xd1)) {
+                kbc_at_log("ATkbc: write P2\n");
+
+#if 0
+                /* Fast A20 - ignore all other bits. */
+                val = (val & 0x02) | (dev->p2 & 0xfd);
+
+                /* Bit 2 of AMI flags is P22-P23 blocked (1 = yes, 0 = no),
+                   discovered by reverse-engineering the AOpeN Vi15G BIOS. */
+                if (dev->ami_flags & 0x04) {
+                    /* If keyboard controller lines P22-P23 are blocked,
+                       we force them to remain unchanged. */
+                    val &= ~0x0c;
+                    val |= (dev->p2 & 0x0c);
+                }
+
+                write_p2_fast_a20(dev, val | 0x01);
+#else
+                /* Fast A20 - ignore all other bits. */
+                write_p2_fast_a20(dev, (dev->p2 & 0xfd) | (val & 0x02));
+#endif
+
+                dev->wantdata  = 0;                
+                dev->state     = STATE_MAIN_IBF;
+                return;
+            }
+            break;
+
+        case 0x64:
+            dev->status |= STAT_CD;
+            if (val == 0xd1) {
+                kbc_at_log("ATkbc: write P2\n");
+                dev->wantdata  = 1;
+                dev->state     = STATE_KBC_PARAM;
+                dev->command = 0xd1;
+                return;
+            }
+            break;
+    }
+
+    dev->ib = val;
+    dev->status |= STAT_IFULL;
+}
+
+static uint8_t
+kbc_at_read(uint16_t port, void *priv)
+{
+    atkbc_t *dev     = (atkbc_t *) priv;
+    uint8_t  ret     = 0xff;
+
+    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1)
+        cycles -= ISA_CYCLES(8);
+
+    switch (port) {
+        case 0x60:
+            ret = dev->ob;
+            dev->status &= ~STAT_OFULL;
+            /* TODO: IRQ is only tied to OBF on the AT KBC, on the PS/2 KBC, it is controlled by a P2 bit.
+                     This also means that in AT mode, the IRQ is level-triggered. */
+            if (!(dev->misc_flags & FLAG_PS2))
+                picintc(1 << 1);
+            break;
+
+        case 0x64:
+            ret = dev->status;
+            break;
+
+        default:
+            kbc_at_log("ATkbc: read(%04x) invalid!\n",port);
+            break;
+    }
+
+    kbc_at_log("ATkbc: [%04X:%08X] read (%04X) = %02X\n",  CS, cpu_state.pc, port, ret);
+
+    return (ret);
+}
+
+static void
+kbc_at_reset(void *priv)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+    uint8_t  kbc_ven = dev->flags & KBC_VEN_MASK;
+
+    dev->status        = STAT_UNLOCKED;
+    dev->mem[0x20]     = 0x01;
+    dev->mem[0x20]    |= CCB_TRANSLATE;
+    dev->command_phase = 0;
+
+    /* Set up the correct Video Type bits. */
+    if (!is286 || (kbc_ven == KBC_VEN_ACER))
+        dev->p1 = video_is_mda() ? 0xb0 : 0xf0;
+    else
+        dev->p1 = video_is_mda() ? 0xf0 : 0xb0;
+    kbc_at_log("ATkbc: P1 = %02x\n", dev->p1);
+
+    /* Disabled both the keyboard and auxiliary ports. */
+    set_enable_kbd(dev, 0);
+    set_enable_aux(dev, 0);
+
+    kbc_at_queue_reset(dev);
+
+    dev->sc_or = 0;
+
+    dev->ami_flags = ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) ? 0x01 : 0x00;
+    dev->misc_flags &= FLAG_PCI;
+
+    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) {
+        dev->misc_flags |= FLAG_PS2;
+        kbc_at_do_poll = kbc_at_poll_ps2;
+    } else
+        kbc_at_do_poll = kbc_at_poll_at;
+
+    dev->misc_flags |= FLAG_CACHE;
+
+    dev->p2 = 0xcd;
+    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) {
+        write_p2(dev, 0x4b);
+    } else {
+        /* The real thing writes CF and then AND's it with BF. */
+        write_p2(dev, 0x8f);
+    }
+
+    /* Stage 1. */
+    dev->status = (dev->status & 0x0f) | (dev->p1 & 0xf0);
+}
+
+static void
+kbc_at_close(void *priv)
+{
+    atkbc_t *dev = (atkbc_t *) priv;
+    int i, max_ports = ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) ? 2 : 1;
+
+    kbc_at_reset(dev);
+
+    /* Stop timers. */
+    timer_disable(&dev->send_delay_timer);
+
+    for (i = 0; i < max_ports; i++) {
+        if (kbc_at_ports[i] != NULL) {
+            free(kbc_at_ports[i]);
+            kbc_at_ports[i] = NULL;
+        }
+    }
+
+    free(dev);
+}
+
+static void *
+kbc_at_init(const device_t *info)
+{
+    atkbc_t *dev;
+    int i, max_ports;
+
+    dev = (atkbc_t *) malloc(sizeof(atkbc_t));
+    memset(dev, 0x00, sizeof(atkbc_t));
+
+    dev->flags = info->local;
+
+    video_reset(gfxcard[0]);
+    kbc_at_reset(dev);
+
+    if (info->flags & DEVICE_PCI)
+        dev->misc_flags |= FLAG_PCI;
+
+    io_sethandler(0x0060, 1, kbc_at_read, NULL, NULL, kbc_at_write, NULL, NULL, dev);
+    io_sethandler(0x0064, 1, kbc_at_read, NULL, NULL, kbc_at_write, NULL, NULL, dev);
+
+    timer_add(&dev->send_delay_timer, kbc_at_poll, dev, 1);
+    timer_add(&dev->pulse_cb, pulse_poll, dev, 0);
+
+    dev->write60_ven = NULL;
+    dev->write64_ven = NULL;
+
+    kbc_ami_revision = '8';
+    kbc_award_revision = 0x42;
+
+    switch (dev->flags & KBC_VEN_MASK) {
+        case KBC_VEN_ACER:
+        case KBC_VEN_GENERIC:
+        case KBC_VEN_NCR:
+        case KBC_VEN_IBM_PS1:
+            dev->write64_ven = write64_generic;
+            break;
+
+        case KBC_VEN_OLIVETTI:
+            dev->write64_ven = write64_olivetti;
+            break;
+
+        case KBC_VEN_ALI:
+            kbc_ami_revision = 'F';
+            kbc_award_revision = 0x43;
+            dev->write60_ven = write60_ami;
+            dev->write64_ven = write64_ami;
+            break;
+
+        case KBC_VEN_TRIGEM_AMI:
+            kbc_ami_revision = 'Z';
+            dev->write60_ven = write60_ami;
+            dev->write64_ven = write64_ami;
+            break;
+
+        case KBC_VEN_AMI:
+            if ((dev->flags & KBC_TYPE_MASK) == KBC_TYPE_GREEN)
+                kbc_ami_revision = '5';
+            else if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) {
+                if (cpu_64bitbus)
+                    kbc_ami_revision = 'R';
+                else if (is486)
+                    kbc_ami_revision = 'P';
+                else
+                    kbc_ami_revision = 'H';
+            } else if (is386 && !is486) {
+                if (cpu_16bitbus)
+                    kbc_ami_revision = 'D';
+                else
+                    kbc_ami_revision = 'B';
+            } else if (!is386)
+                kbc_ami_revision = '8';
+            else
+                kbc_ami_revision = 'F';
+
+            dev->write60_ven = write60_ami;
+            dev->write64_ven = write64_ami;
+            break;
+
+        case KBC_VEN_QUADTEL:
+            dev->write60_ven = write60_quadtel;
+            dev->write64_ven = write64_quadtel;
+            break;
+
+        case KBC_VEN_TOSHIBA:
+            dev->write60_ven = write60_toshiba;
+            dev->write64_ven = write64_toshiba;
+            break;
+    }
+
+    max_ports = ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_1) ? 2 : 1;
+
+    for (i = 0; i < max_ports; i++) {
+        kbc_at_ports[i] = (kbc_at_port_t *) malloc(sizeof(kbc_at_port_t));
+        memset(kbc_at_ports[i], 0x00, sizeof(kbc_at_port_t));
+        kbc_at_ports[i]->out_new = -1;
+    }
+
+    dev->ports[0] = kbc_at_ports[0];
+    dev->ports[1] = kbc_at_ports[1];
+
+    /* The actual keyboard. */
+    device_add(&keyboard_at_generic_device);
+
+    return (dev);
+}
+
+const device_t keyboard_at_device = {
+    .name          = "PC/AT Keyboard",
+    .internal_name = "keyboard_at",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_ISA | KBC_VEN_GENERIC,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_at_ami_device = {
+    .name          = "PC/AT Keyboard (AMI)",
+    .internal_name = "keyboard_at_ami",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_ISA | KBC_VEN_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_at_tg_ami_device = {
+    .name          = "PC/AT Keyboard (TriGem AMI)",
+    .internal_name = "keyboard_at_tg_ami",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_ISA | KBC_VEN_TRIGEM_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_at_toshiba_device = {
+    .name          = "PC/AT Keyboard (Toshiba)",
+    .internal_name = "keyboard_at_toshiba",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_ISA | KBC_VEN_TOSHIBA,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_at_olivetti_device = {
+    .name          = "PC/AT Keyboard (Olivetti)",
+    .internal_name = "keyboard_at_olivetti",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_ISA | KBC_VEN_OLIVETTI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_at_ncr_device = {
+    .name          = "PC/AT Keyboard (NCR)",
+    .internal_name = "keyboard_at_ncr",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_ISA | KBC_VEN_NCR,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_device = {
+    .name          = "PS/2 Keyboard",
+    .internal_name = "keyboard_ps2",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_GENERIC,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_ps1_device = {
+    .name          = "PS/2 Keyboard (IBM PS/1)",
+    .internal_name = "keyboard_ps2_ps1",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_IBM_PS1,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_ps1_pci_device = {
+    .name          = "PS/2 Keyboard (IBM PS/1)",
+    .internal_name = "keyboard_ps2_ps1_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_IBM_PS1,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_xi8088_device = {
+    .name          = "PS/2 Keyboard (Xi8088)",
+    .internal_name = "keyboard_ps2_xi8088",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_GENERIC,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_ami_device = {
+    .name          = "PS/2 Keyboard (AMI)",
+    .internal_name = "keyboard_ps2_ami",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_tg_ami_device = {
+    .name          = "PS/2 Keyboard (TriGem AMI)",
+    .internal_name = "keyboard_ps2_tg_ami",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_TRIGEM_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_mca_2_device = {
+    .name          = "PS/2 Keyboard",
+    .internal_name = "keyboard_ps2_mca_2",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_2 | KBC_VEN_GENERIC,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_quadtel_device = {
+    .name          = "PS/2 Keyboard (Quadtel/MegaPC)",
+    .internal_name = "keyboard_ps2_quadtel",
+    .flags         = DEVICE_KBC,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_QUADTEL,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_pci_device = {
+    .name          = "PS/2 Keyboard",
+    .internal_name = "keyboard_ps2_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_GENERIC,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_ami_pci_device = {
+    .name          = "PS/2 Keyboard (AMI)",
+    .internal_name = "keyboard_ps2_ami_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_ali_pci_device = {
+    .name          = "PS/2 Keyboard (ALi M5123/M1543C)",
+    .internal_name = "keyboard_ps2_ali_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_ALI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_intel_ami_pci_device = {
+    .name          = "PS/2 Keyboard (AMI)",
+    .internal_name = "keyboard_ps2_intel_ami_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_GREEN | KBC_VEN_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_tg_ami_pci_device = {
+    .name          = "PS/2 Keyboard (TriGem AMI)",
+    .internal_name = "keyboard_ps2_tg_ami_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_TRIGEM_AMI,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
+
+const device_t keyboard_ps2_acer_pci_device = {
+    .name          = "PS/2 Keyboard (Acer 90M002A)",
+    .internal_name = "keyboard_ps2_acer_pci",
+    .flags         = DEVICE_KBC | DEVICE_PCI,
+    .local         = KBC_TYPE_PS2_1 | KBC_VEN_ACER,
+    .init          = kbc_at_init,
+    .close         = kbc_at_close,
+    .reset         = kbc_at_reset,
+    { .available = NULL },
+    .speed_changed = NULL,
+    .force_redraw  = NULL,
+    .config        = NULL
+};
diff --git a/src/device/kbc_at_dev.c b/src/device/kbc_at_dev.c
new file mode 100644
index 000000000..71a0b4e08
--- /dev/null
+++ b/src/device/kbc_at_dev.c
@@ -0,0 +1,198 @@
+/*
+ * 86Box    A hypervisor and IBM PC system emulator that specializes in
+ *          running old operating systems and software designed for IBM
+ *          PC systems and compatibles from 1981 through fairly recent
+ *          system designs based on the PCI bus.
+ *
+ *          This file is part of the 86Box distribution.
+ *
+ *          AT / PS/2 attached device emulation.
+ *
+ *
+ *
+ * Authors: Miran Grca, <mgrca8@gmail.com>
+ *
+ *          Copyright 2023 Miran Grca.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#define HAVE_STDARG_H
+#include <wchar.h>
+#include <86box/86box.h>
+#include "cpu.h"
+#include <86box/timer.h>
+#include <86box/io.h>
+#include <86box/pic.h>
+#include <86box/pit.h>
+#include <86box/ppi.h>
+#include <86box/mem.h>
+#include <86box/device.h>
+#include <86box/machine.h>
+#include <86box/m_at_t3100e.h>
+#include <86box/fdd.h>
+#include <86box/fdc.h>
+#include <86box/sound.h>
+#include <86box/snd_speaker.h>
+#include <86box/video.h>
+#include <86box/keyboard.h>
+
+#ifdef ENABLE_KBC_AT_DEV_LOG
+int kbc_at_dev_do_log = ENABLE_KBC_AT_DEV_LOG;
+
+static void
+kbc_at_dev_log(const char *fmt, ...)
+{
+    va_list ap;
+
+    if (kbc_at_dev_do_log) {
+        va_start(ap, fmt);
+        pclog_ex(fmt, ap);
+        va_end(ap);
+    }
+}
+#else
+#    define kbc_at_dev_log(fmt, ...)
+#endif
+
+static void
+kbc_at_dev_queue_reset(atkbc_dev_t *dev, uint8_t reset_main)
+{
+    if (reset_main) {
+        dev->queue_start = dev->queue_end = 0;
+        memset(dev->queue, 0x00, sizeof(dev->queue));
+    }
+
+    dev->cmd_queue_start = dev->cmd_queue_end = 0;
+    memset(dev->cmd_queue, 0x00, sizeof(dev->cmd_queue));
+}
+
+uint8_t
+kbc_at_dev_queue_pos(atkbc_dev_t *dev, uint8_t main)
+{
+    uint8_t ret;
+
+    if (main)
+        ret = ((dev->queue_end - dev->queue_start) & 0xf);
+    else
+        ret = ((dev->cmd_queue_end - dev->cmd_queue_start) & 0xf);
+
+    return ret;
+}
+
+void
+kbc_at_dev_queue_add(atkbc_dev_t *dev, uint8_t val, uint8_t main)
+{
+    if (main) {
+        kbc_at_dev_log("%s: dev->queue[%02X]     = %02X;\n", dev->name, dev->queue_end, val);
+        dev->queue[dev->queue_end]         = val;
+        dev->queue_end                     = (dev->queue_end + 1) & 0xf;
+    } else {
+        kbc_at_dev_log("%s: dev->cmd_queue[%02X] = %02X;\n", dev->name, dev->cmd_queue_end, val);
+        dev->cmd_queue[dev->cmd_queue_end] = val;
+        dev->cmd_queue_end                 = (dev->cmd_queue_end + 1) & 0xf;
+    }
+
+    /* TODO: This should be done on actual send to host. */
+    if (val != 0xfe)
+        dev->last_scan_code = val;
+}
+
+static void
+kbc_at_dev_poll(void *priv)
+{
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
+
+    switch (dev->state) {
+        case DEV_STATE_MAIN_1:
+            /* Process the command if needed and then return to main loop #2. */
+            if (dev->port->wantcmd) {
+                kbc_at_dev_log("%s: Processing keyboard command %02X...\n", dev->name, dev->port->dat);
+                kbc_at_dev_queue_reset(dev, 0);
+                dev->process_cmd(dev);
+                dev->port->wantcmd    = 0;
+            } else
+                dev->state = DEV_STATE_MAIN_2;
+            break;
+        case DEV_STATE_MAIN_2:
+            /* Output from scan queue if needed and then return to main loop #1. */
+            if (*dev->scan && (dev->port->out_new == -1) && (dev->queue_start != dev->queue_end)) {
+                kbc_at_dev_log("%s: %02X (DATA) on channel 1\n", dev->name, dev->queue[dev->queue_start]);
+                dev->port->out_new   = dev->queue[dev->queue_start];
+                dev->queue_start     = (dev->queue_start + 1) & 0xf;
+            }
+            if (!(*dev->scan) || dev->port->wantcmd)
+                dev->state = DEV_STATE_MAIN_1;
+            break;
+        case DEV_STATE_MAIN_OUT:
+            /* If host wants to send command while we're sending a byte to host, process the command. */
+            if (dev->port->wantcmd) {
+                kbc_at_dev_log("%s: Processing keyboard command %02X...\n", dev->name, dev->port->dat);
+                kbc_at_dev_queue_reset(dev, 0);
+                dev->process_cmd(dev);
+                dev->port->wantcmd    = 0;
+                break;
+            }
+            /* FALLTHROUGH */
+        case DEV_STATE_MAIN_WANT_IN:
+            /* Output command response and then return to main loop #2. */
+            if ((dev->port->out_new == -1) && (dev->cmd_queue_start != dev->cmd_queue_end)) {
+                kbc_at_dev_log("%s: %02X (CMD ) on channel 1\n", dev->name, dev->cmd_queue[dev->cmd_queue_start]);
+                dev->port->out_new   = dev->cmd_queue[dev->cmd_queue_start];
+                dev->cmd_queue_start = (dev->cmd_queue_start + 1) & 0xf;
+            }
+            if (dev->cmd_queue_start == dev->cmd_queue_end)
+                dev->state++;
+            break;
+        case DEV_STATE_MAIN_IN:
+            /* Wait for host data. */
+            if (dev->port->wantcmd) {
+                kbc_at_dev_log("%s: Processing keyboard command %02X parameter %02X...\n", dev->name, dev->command, dev->port->dat);
+                kbc_at_dev_queue_reset(dev, 0);
+                dev->process_cmd(dev);
+                dev->port->wantcmd    = 0;
+            }
+            break;
+    }
+}
+
+void
+kbc_at_dev_reset(atkbc_dev_t *dev, int do_fa)
+{
+    dev->port->out_new = -1;
+    dev->port->wantcmd = 0;
+
+    kbc_at_dev_queue_reset(dev, 1);
+
+    dev->last_scan_code = 0x00;
+
+    *dev->scan = 1;
+
+    if (do_fa)
+        kbc_at_dev_queue_add(dev, 0xfa, 0);
+
+    dev->state = DEV_STATE_MAIN_OUT;
+
+    dev->execute_bat(dev);
+}
+
+atkbc_dev_t *
+kbc_at_dev_init(uint8_t inst)
+{
+    atkbc_dev_t *dev;
+
+    dev = (atkbc_dev_t *) malloc(sizeof(atkbc_dev_t));
+    memset(dev, 0x00, sizeof(atkbc_dev_t));
+
+    dev->port = kbc_at_ports[inst];
+
+    if (dev->port != NULL) {
+        dev->port->priv = dev;
+        dev->port->poll = kbc_at_dev_poll;
+    }
+
+    /* Return our private data to the I/O layer. */
+    return (dev);
+}
diff --git a/src/device/keyboard_at.c b/src/device/keyboard_at.c
index 3a841ad91..19a754d7b 100644
--- a/src/device/keyboard_at.c
+++ b/src/device/keyboard_at.c
@@ -6,167 +6,56 @@
  *
  *          This file is part of the 86Box distribution.
  *
- *          Intel 8042 (AT keyboard controller) emulation.
+ *          Implementation of PS/2 series Mouse devices.
  *
  *
  *
- * Authors: Sarah Walker, <https://pcem-emulator.co.uk/>
- *          Miran Grca, <mgrca8@gmail.com>
- *          Fred N. van Kempen, <decwiz@yahoo.com>
- *          EngiNerd, <webmaster.crrc@yahoo.it>
- *
- *          Copyright 2008-2020 Sarah Walker.
- *          Copyright 2016-2020 Miran Grca.
- *          Copyright 2017-2020 Fred N. van Kempen.
- *          Copyright 2020 EngiNerd.
+ * Authors: Fred N. van Kempen, <decwiz@yahoo.com>
  */
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdint.h>
-#include <stdlib.h>
 #include <string.h>
-#include <stdarg.h>
-#define HAVE_STDARG_H
+#include <stdlib.h>
 #include <wchar.h>
+#define HAVE_STDARG_H
 #include <86box/86box.h>
-#include "cpu.h"
-#include <86box/timer.h>
-#include <86box/io.h>
-#include <86box/pic.h>
-#include <86box/pit.h>
-#include <86box/ppi.h>
-#include <86box/mem.h>
 #include <86box/device.h>
-#include <86box/machine.h>
-#include <86box/m_at_t3100e.h>
-#include <86box/fdd.h>
-#include <86box/fdc.h>
-#include <86box/sound.h>
-#include <86box/snd_speaker.h>
-#include <86box/video.h>
 #include <86box/keyboard.h>
+#include <86box/mouse.h>
 
-#define STAT_PARITY        0x80
-#define STAT_RTIMEOUT      0x40
-#define STAT_TTIMEOUT      0x20
-#define STAT_MFULL         0x20
-#define STAT_UNLOCKED      0x10
-#define STAT_CD            0x08
-#define STAT_SYSFLAG       0x04
-#define STAT_IFULL         0x02
-#define STAT_OFULL         0x01
-
-#define CCB_UNUSED         0x80
-#define CCB_TRANSLATE      0x40
-#define CCB_PCMODE         0x20
-#define CCB_ENABLEKBD      0x10
-#define CCB_IGNORELOCK     0x08
-#define CCB_SYSTEM         0x04
-#define CCB_ENABLEMINT     0x02
-#define CCB_ENABLEKINT     0x01
-
-#define CCB_MASK           0x68
-#define MODE_MASK          0x6c
-
-#define KBC_TYPE_ISA       0x00 /* AT ISA-based chips */
-#define KBC_TYPE_PS2_NOREF 0x01 /* PS2 type, no refresh */
-#define KBC_TYPE_PS2_1     0x02 /* PS2 on PS/2, type 1 */
-#define KBC_TYPE_PS2_2     0x03 /* PS2 on PS/2, type 2 */
-#define KBC_TYPE_MASK      0x03
-
-#define KBC_VEN_GENERIC    0x00
-#define KBC_VEN_AMI        0x04
-#define KBC_VEN_IBM_MCA    0x08
-#define KBC_VEN_QUADTEL    0x0c
-#define KBC_VEN_TOSHIBA    0x10
-#define KBC_VEN_IBM_PS1    0x14
-#define KBC_VEN_ACER       0x18
-#define KBC_VEN_INTEL_AMI  0x1c
-#define KBC_VEN_OLIVETTI   0x20
-#define KBC_VEN_NCR        0x24
-#define KBC_VEN_PHOENIX    0x28
-#define KBC_VEN_ALI        0x2c
-#define KBC_VEN_TG         0x30
-#define KBC_VEN_TG_GREEN   0x34
-#define KBC_VEN_MASK       0x3c
+#define FLAG_PS2       0x08  /* dev is AT or PS/2 */
+#define FLAG_AT        0x00  /* dev is AT or PS/2 */
+#define FLAG_TYPE_MASK 0x07  /* mask for type     */
 
 enum {
-    KBC_STATE_RESET = 0,
-    KBC_STATE_MAIN_IBF,
-    KBC_STATE_MAIN_KBD,
-    KBC_STATE_MAIN_MOUSE,
-    KBC_STATE_MAIN_BOTH,
-    KBC_STATE_KBC_OUT,
-    KBC_STATE_KBC_PARAM,
-    KBC_STATE_SEND_KBD,
-    KBC_STATE_KBD,
-    KBC_STATE_SEND_MOUSE,
-    KBC_STATE_MOUSE
-};
-#define KBC_STATE_SCAN_KBD KBC_STATE_KBD
-#define KBC_STATE_SCAN_MOUSE KBC_STATE_MOUSE
-
-enum {
-    DEV_STATE_MAIN_1 = 0,
-    DEV_STATE_MAIN_2,
-    DEV_STATE_MAIN_CMD,
-    DEV_STATE_MAIN_OUT,
-    DEV_STATE_MAIN_WANT_IN,
-    DEV_STATE_MAIN_IN
+    KBD_84_KEY = 0,
+    KBD_101_KEY,
+    KBD_102_KEY,
+    KBD_JIS,
+    KBD_KOREAN
 };
 
-typedef struct {
-    /* Controller. */
-    uint8_t pci, kbc_state, command, want60,
-            status, ib, out, old_out,
-            sc_or, secr_phase, mem_addr, input_port,
-            output_port, old_output_port, output_locked, ami_stat,
-            ami_flags, key_ctrl_queue_start, key_ctrl_queue_end;
+#define FLAG_ENABLED 0x10  /* dev is enabled for use */
+#define FLAG_CTRLDAT 0x08  /* ctrl or data mode */
 
-    /* Keyboard. */
-    uint8_t key_command, key_wantdata, kbd_last_scan_code,
-            kbd_state, key_wantcmd, key_dat, key_cmd_queue_start,
-            key_cmd_queue_end, key_queue_start, key_queue_end;
-
-    /* Mouse. */
-    uint8_t mouse_state, mouse_wantcmd, mouse_dat, mouse_cmd_queue_start,
-            mouse_cmd_queue_end, mouse_queue_start, mouse_queue_end;
-
-    /* Controller. */
-    uint8_t mem[0x100];
-
-    /* Controller - internal FIFO for the purpose of commands with multi-byte output. */
-    uint8_t key_ctrl_queue[64];
-
-    /* Keyboard - command response FIFO. */
-    uint8_t key_cmd_queue[16];
-
-    /* Keyboard - scan FIFO. */
-    uint8_t key_queue[16];
-
-    /* Mouse - command response FIFO. */
-    uint8_t mouse_cmd_queue[16];
-
-    /* Mouse - scan FIFO. */
-    uint8_t mouse_queue[16];
-
-    /* Keyboard. */
-    int out_new;
-
-    /* Mouse. */
-    int out_new_mouse;
-
-    /* Controller. */
-    uint32_t flags;
-
-    /* Controller (main timer). */
-    pc_timer_t send_delay_timer;
-
-    /* Controller (P2 pulse callback timer). */
-    pc_timer_t pulse_cb;
-
-    uint8_t (*write60_ven)(void *p, uint8_t val);
-    uint8_t (*write64_ven)(void *p, uint8_t val);
-} atkbd_t;
+const uint8_t id_bytes[16][4] = { { 0x00, 0x00, 0x00, 0x00 },    /* AT 84-key */
+                                  { 0x00, 0x00, 0x00, 0x00 },    /* AT 101/102/106-key */
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 },    /* AT Korean */
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0xab, 0x83, 0x00, 0x00 },    /* PS/2 101-key */
+                                  { 0xab, 0x83, 0x00, 0x00 },    /* PS/2 102-key */
+                                  { 0xab, 0x90, 0x00, 0x00 },    /* PS/2 106-key JIS */
+                                  /* Japanese keyboard ID - TODO: Find the actual Korean one. */
+                                  { 0xab, 0x90, 0x00, 0x00 },    /* PS/2 Korean */
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 },
+                                  { 0x00, 0x00, 0x00, 0x00 } };
 
 /* Global keyboard flags for scan code set 3:
    bit 0 = repeat, bit 1 = makes break code? */
@@ -178,48 +67,9 @@ uint8_t keyboard_set3_all_break;
    Bits 0 - 1 = scan code set. */
 uint8_t        keyboard_mode = 0x02;
 
-/* Keyboard controller ports. */
-kbc_port_t     *kbc_ports[2] = { NULL, NULL };
+static atkbc_dev_t *SavedKbd                        = NULL;
 
-static void (*mouse_write)(uint8_t val, void *priv) = NULL;
-static void    *mouse_p                             = NULL;
-static atkbd_t *SavedKbd                            = NULL; // FIXME: remove!!! --FvK
-
-/* Non-translated to translated scan codes. */
-static const uint8_t nont_to_t[256] = {
-    0xff, 0x43, 0x41, 0x3f, 0x3d, 0x3b, 0x3c, 0x58,
-    0x64, 0x44, 0x42, 0x40, 0x3e, 0x0f, 0x29, 0x59,
-    0x65, 0x38, 0x2a, 0x70, 0x1d, 0x10, 0x02, 0x5a,
-    0x66, 0x71, 0x2c, 0x1f, 0x1e, 0x11, 0x03, 0x5b,
-    0x67, 0x2e, 0x2d, 0x20, 0x12, 0x05, 0x04, 0x5c,
-    0x68, 0x39, 0x2f, 0x21, 0x14, 0x13, 0x06, 0x5d,
-    0x69, 0x31, 0x30, 0x23, 0x22, 0x15, 0x07, 0x5e,
-    0x6a, 0x72, 0x32, 0x24, 0x16, 0x08, 0x09, 0x5f,
-    0x6b, 0x33, 0x25, 0x17, 0x18, 0x0b, 0x0a, 0x60,
-    0x6c, 0x34, 0x35, 0x26, 0x27, 0x19, 0x0c, 0x61,
-    0x6d, 0x73, 0x28, 0x74, 0x1a, 0x0d, 0x62, 0x6e,
-    0x3a, 0x36, 0x1c, 0x1b, 0x75, 0x2b, 0x63, 0x76,
-    0x55, 0x56, 0x77, 0x78, 0x79, 0x7a, 0x0e, 0x7b,
-    0x7c, 0x4f, 0x7d, 0x4b, 0x47, 0x7e, 0x7f, 0x6f,
-    0x52, 0x53, 0x50, 0x4c, 0x4d, 0x48, 0x01, 0x45,
-    0x57, 0x4e, 0x51, 0x4a, 0x37, 0x49, 0x46, 0x54,
-    0x80, 0x81, 0x82, 0x41, 0x54, 0x85, 0x86, 0x87,
-    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
-    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
-    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
-    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
-    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
-    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
-    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
-    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
-    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
+static uint8_t     inv_cmd_response                 = 0xfa;
 
 static const scancode scancode_set1[512] = {
   // clang-format off
@@ -617,12 +467,11 @@ static const scancode scancode_set3[512] = {
   // clang-format on
 };
 
-// #define ENABLE_KEYBOARD_AT_LOG 1
 #ifdef ENABLE_KEYBOARD_AT_LOG
 int keyboard_at_do_log = ENABLE_KEYBOARD_AT_LOG;
 
 static void
-kbd_log(const char *fmt, ...)
+keyboard_at_log(const char *fmt, ...)
 {
     va_list ap;
 
@@ -633,690 +482,41 @@ kbd_log(const char *fmt, ...)
     }
 }
 #else
-#    define kbd_log(fmt, ...)
+#    define keyboard_at_log(fmt, ...)
 #endif
 
 static void
-set_scancode_map(atkbd_t *dev)
+keyboard_at_set_scancode_set(void)
 {
     switch (keyboard_mode) {
-        case 1:
+        case 0x01:
         default:
             keyboard_set_table(scancode_set1);
             break;
-        case 2:
+
+        case 0x02:
             keyboard_set_table(scancode_set2);
             break;
 
-        case 3:
+        case 0x03:
             keyboard_set_table(scancode_set3);
             break;
     }
 }
 
 static void
-kbc_queue_reset(atkbd_t *dev, uint8_t channel)
-{
-    switch (channel) {
-        case 1:
-            dev->key_queue_start = dev->key_queue_end = 0;
-            memset(dev->key_queue, 0x00, sizeof(dev->key_queue));
-            /* FALLTHROUGH */
-        case 4:
-            dev->key_cmd_queue_start = dev->key_cmd_queue_end = 0;
-            memset(dev->key_cmd_queue, 0x00, sizeof(dev->key_cmd_queue));
-            break;
-
-        case 2:
-            dev->mouse_queue_start = dev->mouse_queue_end = 0;
-            memset(dev->mouse_queue, 0x00, sizeof(dev->mouse_queue));
-            /* FALLTHROUGH */
-        case 3:
-            dev->mouse_cmd_queue_start = dev->mouse_cmd_queue_end = 0;
-            memset(dev->mouse_cmd_queue, 0x00, sizeof(dev->mouse_cmd_queue));
-            break;
-
-        case 0:
-        default:
-            dev->key_ctrl_queue_start = dev->key_ctrl_queue_end = 0;
-            memset(dev->key_ctrl_queue, 0x00, sizeof(dev->key_ctrl_queue));
-    }
-}
-
-static void
-kbc_queue_add(atkbd_t *dev, uint8_t val, uint8_t channel)
-{
-    switch (channel) {
-        case 4:
-            kbd_log("ATkbc: dev->key_cmd_queue[%02X] = %02X;\n", dev->key_cmd_queue_end, val);
-            dev->key_cmd_queue[dev->key_cmd_queue_end] = val;
-            dev->key_cmd_queue_end       = (dev->key_cmd_queue_end + 1) & 0xf;
-            break;
-        case 3:
-            kbd_log("ATkbc: dev->mouse_cmd_queue[%02X] = %02X;\n", dev->mouse_cmd_queue_end, val);
-            dev->mouse_cmd_queue[dev->mouse_cmd_queue_end] = val;
-            dev->mouse_cmd_queue_end     = (dev->mouse_cmd_queue_end + 1) & 0xf;
-            break;
-        case 2:
-            kbd_log("ATkbc: dev->mouse_queue[%02X] = %02X;\n", dev->mouse_queue_end, val);
-            dev->mouse_queue[dev->mouse_queue_end] = val;
-            dev->mouse_queue_end         = (dev->mouse_queue_end + 1) & 0xf;
-            break;
-        case 1:
-            kbd_log("ATkbc: dev->key_queue[%02X] = %02X;\n", dev->key_queue_end, val);
-            dev->key_queue[dev->key_queue_end] = val;
-            dev->key_queue_end           = (dev->key_queue_end + 1) & 0xf;
-            break;
-        case 0:
-        default:
-            kbd_log("ATkbc: dev->key_ctrl_queue[%02X] = %02X;\n", dev->key_ctrl_queue_end, val);
-            dev->key_ctrl_queue[dev->key_ctrl_queue_end] = val;
-            dev->key_ctrl_queue_end                 = (dev->key_ctrl_queue_end + 1) & 0x3f;
-            break;
-    }
-}
-
-static int
-kbc_translate(atkbd_t *dev, uint8_t val)
-{
-    int      xt_mode   = (dev->mem[0x20] & 0x20) && ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF);
-    int      translate = (dev->mem[0x20] & 0x40) || xt_mode || ((dev->flags & KBC_TYPE_MASK) == KBC_TYPE_PS2_2);
-    uint8_t  kbc_ven   = dev->flags & KBC_VEN_MASK;
-    int      ret       = - 1;
-
-    /* Allow for scan code translation. */
-    if (translate && (val == 0xf0)) {
-        kbd_log("ATkbd: translate is on, F0 prefix detected\n");
-        dev->sc_or = 0x80;
-        return ret;
-    }
-
-    /* Skip break code if translated make code has bit 7 set. */
-    if (translate && (dev->sc_or == 0x80) && (nont_to_t[val] & 0x80)) {
-        kbd_log("ATkbd: translate is on, skipping scan code: %02X (original: F0 %02X)\n", nont_to_t[val], val);
-        dev->sc_or = 0;
-        return ret;
-    }
-
-    /* Test for T3100E 'Fn' key (Right Alt / Right Ctrl) */
-    if ((dev != NULL) && (kbc_ven == KBC_VEN_TOSHIBA) &&
-        (keyboard_recv(0x138) || keyboard_recv(0x11d)))  switch (val) {
-        case 0x4f:
-            t3100e_notify_set(0x01);
-            break; /* End */
-        case 0x50:
-            t3100e_notify_set(0x02);
-            break; /* Down */
-        case 0x51:
-            t3100e_notify_set(0x03);
-            break; /* PgDn */
-        case 0x52:
-            t3100e_notify_set(0x04);
-            break; /* Ins */
-        case 0x53:
-            t3100e_notify_set(0x05);
-            break; /* Del */
-        case 0x54:
-            t3100e_notify_set(0x06);
-            break; /* SysRQ */
-        case 0x45:
-            t3100e_notify_set(0x07);
-            break; /* NumLock */
-        case 0x46:
-            t3100e_notify_set(0x08);
-            break; /* ScrLock */
-        case 0x47:
-            t3100e_notify_set(0x09);
-            break; /* Home */
-        case 0x48:
-            t3100e_notify_set(0x0a);
-            break; /* Up */
-        case 0x49:
-            t3100e_notify_set(0x0b);
-            break; /* PgUp */
-        case 0x4a:
-            t3100e_notify_set(0x0c);
-            break; /* Keypad - */
-        case 0x4b:
-            t3100e_notify_set(0x0d);
-            break; /* Left */
-        case 0x4c:
-            t3100e_notify_set(0x0e);
-            break; /* KP 5 */
-        case 0x4d:
-            t3100e_notify_set(0x0f);
-            break; /* Right */
-    }
-
-    kbd_log("ATkbd: translate is %s, ", translate ? "on" : "off");
-#ifdef ENABLE_KEYBOARD_AT_LOG
-    kbd_log("scan code: ");
-    if (translate) {
-        kbd_log("%02X (original: ", (nont_to_t[val] | dev->sc_or));
-        if (dev->sc_or == 0x80)
-            kbd_log("F0 ");
-        kbd_log("%02X)\n", val);
-    } else
-        kbd_log("%02X\n", val);
-#endif
-
-    ret = translate ? (nont_to_t[val] | dev->sc_or) : val;
-
-    if (dev->sc_or == 0x80)
-        dev->sc_or = 0;
-
-    return ret;
-}
-
-static void
-add_to_kbc_queue_front(atkbd_t *dev, uint8_t val, uint8_t channel, uint8_t stat_hi)
-{
-    uint8_t kbc_ven = dev->flags & KBC_VEN_MASK;
-    int temp = (channel == 1) ? kbc_translate(dev, val) : val;
-
-    if (temp == -1)
-        return;
-
-    if ((kbc_ven == KBC_VEN_AMI) || (kbc_ven == KBC_VEN_TG) ||
-        (kbc_ven == KBC_VEN_TG_GREEN) || ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF))
-        stat_hi |= ((dev->input_port & 0x80) ? 0x10 : 0x00);
-    else
-        stat_hi |= 0x10;
-
-    kbd_log("ATkbc: Adding %02X to front on channel %i...\n", temp, channel);
-    dev->status = (dev->status & ~0xf0) | STAT_OFULL | stat_hi;
-
-    /* WARNING: On PS/2, all IRQ's are level-triggered, but the IBM PS/2 KBC firmware is explicitly
-                written to pulse its P2 IRQ bits, so they should be kept as as edge-triggered here. */
-    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-        if (channel >= 2) {
-            dev->status |= STAT_MFULL;
-
-            if (dev->mem[0x20] & 0x02)
-                picint_common(1 << 12, 0, 1);
-            picint_common(1 << 1, 0, 0);
-        } else {
-            if (dev->mem[0x20] & 0x01)
-                picint_common(1 << 1, 0, 1);
-            picint_common(1 << 12, 0, 0);
-        }
-    } else if (dev->mem[0x20] & 0x01)
-        picintlevel(1 << 1); /* AT KBC: IRQ 1 is level-triggered because it is tied to OBF. */
-
-    dev->out = temp;
-}
-
-static void
-add_data_kbd_cmd_queue(atkbd_t *dev, uint8_t val)
-{
-    if (dev->key_cmd_queue_end >= 16) {
-        kbd_log("ATkbc: Unable to add to queue, dev->key_cmd_queue_end >= 16\n");
-        return;
-    }
-    kbd_log("ATkbc: dev->key_cmd_queue[%02X] = %02X;\n", dev->key_cmd_queue_end, val);
-    kbc_queue_add(dev, val, 4);
-    dev->kbd_last_scan_code = val;
-}
-
-static void
-add_data_kbd_queue(atkbd_t *dev, uint8_t val)
-{
-    if (!keyboard_scan || (dev->key_queue_end >= 16)) {
-        kbd_log("ATkbc: Unable to add to queue, conditions: %i, %i\n", !keyboard_scan, (dev->key_queue_end >= 16));
-        return;
-    }
-    kbd_log("ATkbc: key_queue[%02X] = %02X;\n", dev->key_queue_end, val);
-    kbc_queue_add(dev, val, 1);
-    dev->kbd_last_scan_code = val;
-}
-
-static void
-add_data_kbd_front(atkbd_t *dev, uint8_t val)
-{
-    add_data_kbd_cmd_queue(dev, val);
-}
-
-static void kbd_process_cmd(void *priv);
-static void kbc_process_cmd(void *priv);
-
-static void
-set_enable_kbd(atkbd_t *dev, uint8_t enable)
-{
-    dev->mem[0x20] &= 0xef;
-    dev->mem[0x20] |= (enable ? 0x00 : 0x10);
-}
-
-static void
-set_enable_mouse(atkbd_t *dev, uint8_t enable)
-{
-    dev->mem[0x20] &= 0xdf;
-    dev->mem[0x20] |= (enable ? 0x00 : 0x20);
-}
-
-static void
-kbc_ibf_process(atkbd_t *dev)
-{
-    /* IBF set, process both commands and data. */
-    dev->status &= ~STAT_IFULL;
-    dev->kbc_state      = KBC_STATE_MAIN_IBF;
-    if (dev->status & STAT_CD)
-        kbc_process_cmd(dev);
-    else {
-        set_enable_kbd(dev, 1);
-        dev->key_wantcmd = 1;
-        dev->key_dat = dev->ib;
-        dev->kbc_state = KBC_STATE_SEND_KBD;
-    }
-}
-
-static void
-kbc_scan_kbd_at(atkbd_t *dev)
-{
-    if (!(dev->mem[0x20] & 0x10)) {
-        /* Both OBF and IBF clear and keyboard is enabled. */
-        /* XT mode. */
-        if (dev->mem[0x20] & 0x20) {
-            if (dev->out_new != -1) {
-                add_to_kbc_queue_front(dev, dev->out_new, 1, 0x00);
-                dev->out_new        = -1;
-                dev->kbc_state      = KBC_STATE_MAIN_IBF;
-            } else if (dev->status & STAT_IFULL)
-                kbc_ibf_process(dev);
-        /* AT mode. */
-        } else {
-            // dev->t = dev->mem[0x28];
-            if (dev->mem[0x2e] != 0x00) {
-                // if (!(dev->t & 0x02))
-                    // return;
-                dev->mem[0x2e] = 0x00;
-            }
-            dev->output_port &= 0xbf;
-            if (dev->out_new != -1) {
-                /* In our case, we never have noise on the line, so we can simplify this. */
-                /* Read data from the keyboard. */
-                if (dev->mem[0x20] & 0x40) {
-                    if ((dev->mem[0x20] & 0x08) || (dev->input_port & 0x80))
-                        add_to_kbc_queue_front(dev, dev->out_new, 1, 0x00);
-                    dev->mem[0x2d] = (dev->out_new == 0xf0) ? 0x80 : 0x00;
-                } else
-                    add_to_kbc_queue_front(dev, dev->out_new, 1, 0x00);
-                dev->out_new        = -1;
-                dev->kbc_state      = KBC_STATE_MAIN_IBF;
-            }
-        }
-    }
-}
-
-static void    write_output(atkbd_t *dev, uint8_t val);
-
-static void
-kbc_poll_at(atkbd_t *dev)
-{
-    switch (dev->kbc_state) {
-        case KBC_STATE_RESET:
-            if (dev->status & STAT_IFULL) {
-                dev->status = ((dev->status & 0x0f) | 0x10) & ~STAT_IFULL;
-                if ((dev->status & STAT_CD) && (dev->ib == 0xaa))
-                    kbc_process_cmd(dev);
-            }
-            break;
-        case KBC_STATE_MAIN_IBF:
-        default:
-           if (dev->status & STAT_OFULL) {
-                /* OBF set, wait until it is cleared but still process commands. */
-                if ((dev->status & STAT_IFULL) && (dev->status & STAT_CD)) {
-                    dev->status &= ~STAT_IFULL;
-                    kbc_process_cmd(dev);
-                }
-            } else if (dev->status & STAT_IFULL)
-                kbc_ibf_process(dev);
-            else if (!(dev->mem[0x20] & 0x10))
-                dev->kbc_state = KBC_STATE_MAIN_KBD;
-            break;
-        case KBC_STATE_MAIN_KBD:
-        case KBC_STATE_MAIN_BOTH:
-            if (dev->status & STAT_IFULL)
-                kbc_ibf_process(dev);
-            else {
-                (void) kbc_scan_kbd_at(dev);
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-            }
-            break;
-        case KBC_STATE_KBC_OUT:
-            /* Keyboard controller command want to output multiple bytes. */
-            if (dev->status & STAT_IFULL) {
-                /* Data from host aborts dumping. */
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-                kbc_ibf_process(dev);
-            }
-            /* Do not continue dumping until OBF is clear. */
-            if (!(dev->status & STAT_OFULL)) {
-                kbd_log("ATkbc: %02X coming from channel 0\n", dev->key_ctrl_queue[dev->key_ctrl_queue_start]);
-                add_to_kbc_queue_front(dev, dev->key_ctrl_queue[dev->key_ctrl_queue_start], 0, 0x00);
-                dev->key_ctrl_queue_start = (dev->key_ctrl_queue_start + 1) & 0x3f;
-                if (dev->key_ctrl_queue_start == dev->key_ctrl_queue_end)
-                    dev->kbc_state = KBC_STATE_MAIN_IBF;
-            }
-            break;
-        case KBC_STATE_KBC_PARAM:
-            /* Keyboard controller command wants data, wait for said data. */
-            if (dev->status & STAT_IFULL) {
-                /* Command written, abort current command. */
-                if (dev->status & STAT_CD)
-                    dev->kbc_state = KBC_STATE_MAIN_IBF;
-
-                dev->status &= ~STAT_IFULL;
-                kbc_process_cmd(dev);
-            }
-            break;
-        case KBC_STATE_SEND_KBD:
-            if (!dev->key_wantcmd)
-                dev->kbc_state = KBC_STATE_SCAN_KBD;
-            break;
-        case KBC_STATE_SCAN_KBD:
-            kbc_scan_kbd_at(dev);
-            break;
-    }
-}
-
-/*
-    Correct Procedure:
-        1. Controller asks the device (keyboard or mouse) for a byte.
-        2. The device, unless it's in the reset or command states, sees if there's anything to give it,
-           and if yes, begins the transfer.
-        3. The controller checks if there is a transfer, if yes, transfers the byte and sends it to the host,
-           otherwise, checks the next device, or if there is no device left to check, checks if IBF is full
-           and if yes, processes it.
- */
-static int
-kbc_scan_kbd_ps2(atkbd_t *dev)
-{
-    if (dev->out_new != -1) {
-        kbd_log("ATkbc: %02X coming from channel 1\n", dev->out_new & 0xff);
-        add_to_kbc_queue_front(dev, dev->out_new, 1, 0x00);
-        dev->out_new        = -1;
-        dev->kbc_state      = KBC_STATE_MAIN_IBF;
-        return 1;
-    }
-
-    return 0;
-}
-
-static int
-kbc_scan_aux_ps2(atkbd_t *dev)
-{
-    if (dev->out_new_mouse != -1) {
-        kbd_log("ATkbc: %02X coming from channel 2\n", dev->out_new_mouse & 0xff);
-        add_to_kbc_queue_front(dev, dev->out_new_mouse, 2, 0x00);
-        dev->out_new_mouse  = -1;
-        dev->kbc_state      = KBC_STATE_MAIN_IBF;
-        return 1;
-    }
-
-    return 0;
-}
-
-static void
-kbc_poll_ps2(atkbd_t *dev)
-{
-    switch (dev->kbc_state) {
-        case KBC_STATE_RESET:
-            if (dev->status & STAT_IFULL) {
-                dev->status = ((dev->status & 0x0f) | 0x10) & ~STAT_IFULL;
-                if ((dev->status & STAT_CD) && (dev->ib == 0xaa))
-                    kbc_process_cmd(dev);
-            }
-            break;
-        case KBC_STATE_MAIN_IBF:
-        default:
-            if (dev->status & STAT_IFULL)
-                kbc_ibf_process(dev);
-            else if (!(dev->status & STAT_OFULL)) {
-                if (dev->mem[0x20] & 0x20) {
-                    if (!(dev->mem[0x20] & 0x10)) {
-                        dev->output_port &= 0xbf;
-                        dev->kbc_state = KBC_STATE_MAIN_KBD;
-                    }
-                } else {
-                    dev->output_port &= 0xf7;
-                    if (dev->mem[0x20] & 0x10)
-                        dev->kbc_state = KBC_STATE_MAIN_MOUSE;
-                    else {
-                        dev->output_port &= 0xbf;
-                        dev->kbc_state = KBC_STATE_MAIN_BOTH;
-                    }
-                }
-            }
-            break;
-        case KBC_STATE_MAIN_KBD:
-            if (dev->status & STAT_IFULL)
-                kbc_ibf_process(dev);
-            else {
-                (void) kbc_scan_kbd_ps2(dev);
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-            }
-            break;
-        case KBC_STATE_MAIN_MOUSE:
-            if (dev->status & STAT_IFULL)
-                kbc_ibf_process(dev);
-            else {
-                (void) kbc_scan_aux_ps2(dev);
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-            }
-            break;
-        case KBC_STATE_MAIN_BOTH:
-            if (kbc_scan_kbd_ps2(dev))
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-            else
-                dev->kbc_state = KBC_STATE_MAIN_MOUSE;
-            break;
-        case KBC_STATE_KBC_OUT:
-            /* Keyboard controller command want to output multiple bytes. */
-            if (dev->status & STAT_IFULL) {
-                /* Data from host aborts dumping. */
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-                kbc_ibf_process(dev);
-            }
-            /* Do not continue dumping until OBF is clear. */
-            if (!(dev->status & STAT_OFULL)) {
-                kbd_log("ATkbc: %02X coming from channel 0\n", dev->out_new & 0xff);
-                add_to_kbc_queue_front(dev, dev->key_ctrl_queue[dev->key_ctrl_queue_start], 0, 0x00);
-                dev->key_ctrl_queue_start = (dev->key_ctrl_queue_start + 1) & 0x3f;
-                if (dev->key_ctrl_queue_start == dev->key_ctrl_queue_end)
-                    dev->kbc_state = KBC_STATE_MAIN_IBF;
-            }
-            break;
-        case KBC_STATE_KBC_PARAM:
-            /* Keyboard controller command wants data, wait for said data. */
-            if (dev->status & STAT_IFULL) {
-                /* Command written, abort current command. */
-                if (dev->status & STAT_CD)
-                    dev->kbc_state = KBC_STATE_MAIN_IBF;
-
-                dev->status &= ~STAT_IFULL;
-                kbc_process_cmd(dev);
-            }
-            break;
-        case KBC_STATE_SEND_KBD:
-            if (!dev->key_wantcmd)
-                dev->kbc_state = KBC_STATE_SCAN_KBD;
-            break;
-        case KBC_STATE_SCAN_KBD:
-            (void) kbc_scan_kbd_ps2(dev);
-            break;
-        case KBC_STATE_SEND_MOUSE:
-            if (!dev->mouse_wantcmd)
-                dev->kbc_state = KBC_STATE_SCAN_MOUSE;
-            break;
-        case KBC_STATE_SCAN_MOUSE:
-            (void) kbc_scan_aux_ps2(dev);
-            break;
-    }
-}
-
-static void
-kbc_poll_kbd(atkbd_t *dev)
-{
-    switch (dev->kbd_state) {
-        case DEV_STATE_MAIN_1:
-            /* Process the command if needed and then return to main loop #2. */
-            if (dev->key_wantcmd) {
-                kbd_log("ATkbc: Processing keyboard command %02X...\n", dev->key_dat);
-                kbc_queue_reset(dev, 4);
-                // dev->out_new = -1;
-                kbd_process_cmd(dev);
-                dev->key_wantcmd    = 0;
-            } else
-                dev->kbd_state = DEV_STATE_MAIN_2;
-            break;
-        case DEV_STATE_MAIN_2:
-            /* Output from scan queue if needed and then return to main loop #1. */
-            if (keyboard_scan && (dev->out_new == -1) && (dev->key_queue_start != dev->key_queue_end)) {
-                kbd_log("ATkbc: %02X (DATA) on channel 1\n", dev->key_queue[dev->key_queue_start]);
-                dev->out_new         = dev->key_queue[dev->key_queue_start];
-                dev->key_queue_start = (dev->key_queue_start + 1) & 0xf;
-            }
-            if (!keyboard_scan || dev->key_wantcmd)
-                dev->kbd_state = DEV_STATE_MAIN_1;
-            break;
-        case DEV_STATE_MAIN_OUT:
-            /* If host wants to send command while we're sending a byte to host, process the command. */
-            if (dev->key_wantcmd) {
-                kbd_log("ATkbc: Processing keyboard command %02X...\n", dev->key_dat);
-                kbc_queue_reset(dev, 4);
-                kbd_process_cmd(dev);
-                dev->key_wantcmd    = 0;
-                break;
-            }
-            /* FALLTHROUGH */
-        case DEV_STATE_MAIN_WANT_IN:
-            /* Output command response and then wait for host data. */
-            if ((dev->out_new == -1) && (dev->key_cmd_queue_start != dev->key_cmd_queue_end)) {
-                kbd_log("ATkbc: %02X (CMD ) on channel 1\n", dev->key_cmd_queue[dev->key_cmd_queue_start]);
-                dev->out_new             = dev->key_cmd_queue[dev->key_cmd_queue_start];
-                dev->key_cmd_queue_start = (dev->key_cmd_queue_start + 1) & 0xf;
-            }
-            if (dev->key_cmd_queue_start == dev->key_cmd_queue_end)
-                dev->kbd_state = (dev->kbd_state == DEV_STATE_MAIN_OUT) ? DEV_STATE_MAIN_2 : DEV_STATE_MAIN_IN;
-            break;
-        case DEV_STATE_MAIN_IN:
-            /* Wait for host data. */
-            if (dev->key_wantcmd) {
-                kbd_log("ATkbc: Processing keyboard command %02X parameter %02X...\n", dev->key_command, dev->key_dat);
-                kbc_queue_reset(dev, 4);
-                // dev->out_new = -1;
-                kbd_process_cmd(dev);
-                dev->key_wantcmd    = 0;
-            }
-            break;
-    }
-}
-
-static void
-kbc_poll_aux(atkbd_t *dev)
-{
-    switch (dev->mouse_state) {
-        case DEV_STATE_MAIN_1:
-            /* Process the command if needed and then return to main loop #2. */
-            if (dev->mouse_wantcmd) {
-                kbd_log("ATkbc: Processing mouse command %02X...\n", dev->mouse_dat);
-                kbc_queue_reset(dev, 3);
-                // dev->out_new_mouse = -1;
-                dev->mouse_state = DEV_STATE_MAIN_OUT;
-                mouse_write(dev->mouse_dat, mouse_p);
-                if ((dev->mouse_dat == 0xe8) || (dev->mouse_dat == 0xf3))
-                    dev->mouse_state = DEV_STATE_MAIN_WANT_IN;
-                dev->mouse_wantcmd  = 0;
-            } else
-                dev->mouse_state = DEV_STATE_MAIN_2;
-            break;
-        case DEV_STATE_MAIN_2:
-            /* Output from scan queue if needed and then return to main loop #1. */
-            if (mouse_scan && (dev->out_new_mouse == -1) && (dev->mouse_queue_start != dev->mouse_queue_end)) {
-                kbd_log("ATkbc: %02X (DATA) on channel 2\n", dev->mouse_queue[dev->mouse_queue_start]);
-                dev->out_new_mouse       = dev->mouse_queue[dev->mouse_queue_start];
-                dev->mouse_queue_start   = (dev->mouse_queue_start + 1) & 0xf;
-            }
-            if (!mouse_scan || dev->mouse_wantcmd)
-                dev->mouse_state = DEV_STATE_MAIN_1;
-            break;
-        case DEV_STATE_MAIN_OUT:
-            /* If host wants to send command while we're sending a byte to host, process the command. */
-            if (dev->mouse_wantcmd) {
-                kbd_log("ATkbc: Processing mouse command %02X...\n", dev->mouse_dat);
-                kbc_queue_reset(dev, 3);
-                dev->mouse_state = DEV_STATE_MAIN_OUT;
-                mouse_write(dev->mouse_dat, mouse_p);
-                if ((dev->mouse_dat == 0xe8) || (dev->mouse_dat == 0xf3))
-                    dev->mouse_state = DEV_STATE_MAIN_WANT_IN;
-                dev->mouse_wantcmd  = 0;
-                break;
-            }
-            /* FALLTHROUGH */
-        case DEV_STATE_MAIN_WANT_IN:
-            /* Output command response and then wait for host data. */
-            if ((dev->out_new_mouse == -1) && (dev->mouse_cmd_queue_start != dev->mouse_cmd_queue_end)) {
-                kbd_log("ATkbc: %02X (CMD ) on channel 2\n", dev->mouse_cmd_queue[dev->mouse_cmd_queue_start]);
-                dev->out_new_mouse         = dev->mouse_cmd_queue[dev->mouse_cmd_queue_start];
-                dev->mouse_cmd_queue_start = (dev->mouse_cmd_queue_start + 1) & 0xf;
-            }
-            if (dev->mouse_cmd_queue_start == dev->mouse_cmd_queue_end)
-                dev->mouse_state = (dev->mouse_state == DEV_STATE_MAIN_OUT) ? DEV_STATE_MAIN_2 : DEV_STATE_MAIN_IN;
-            break;
-        case DEV_STATE_MAIN_IN:
-            /* Wait for host data. */
-            if (dev->mouse_wantcmd) {
-                kbd_log("ATkbc: Processing mouse command parameter %02X...\n", dev->mouse_dat);
-                kbc_queue_reset(dev, 3);
-                // dev->out_new_mouse = -1;
-                dev->mouse_state = DEV_STATE_MAIN_OUT;
-                mouse_write(dev->mouse_dat, mouse_p);
-                dev->mouse_wantcmd  = 0;
-            }
-            break;
-    }
-}
-
-/* TODO: State machines for controller, keyboard, and mouse. */
-static void
-kbd_poll(void *priv)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    timer_advance_u64(&dev->send_delay_timer, (100ULL * TIMER_USEC));
-
-    /* TODO: Use a fuction pointer for this (also needed to the AMI KBC mode switching)
-             and implement the password security state. */
-    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF)
-        kbc_poll_ps2(dev);
-    else
-        kbc_poll_at(dev);
-
-    kbc_poll_kbd(dev);
-
-    if (((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) && mouse_write)
-        kbc_poll_aux(dev);
-
-    // if (kbc_ports[0] && kbc_ports[0]>-priv)
-        // kbc_ports[0]>poll(kbc_ports[0]>-priv);
-
-    // if (((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) && kbc_ports[1] && kbc_ports[1]>-priv)
-        // kbc_ports[1]>poll(kbc_ports[1]>-priv);
-}
-
-static void
-add_data_vals(atkbd_t *dev, uint8_t *val, uint8_t len)
+add_data_vals(atkbc_dev_t *dev, uint8_t *val, uint8_t len)
 {
     int i;
 
     for (i = 0; i < len; i++)
-        add_data_kbd_queue(dev, val[i]);
+        kbc_at_dev_queue_add(dev, val[i], 1);
 }
 
 static void
 add_data_kbd(uint16_t val)
 {
-    atkbd_t *dev       = SavedKbd;
+    atkbc_dev_t *dev = SavedKbd;
     uint8_t  fake_shift[4];
     uint8_t  num_lock = 0, shift_states = 0;
 
@@ -1325,10 +525,10 @@ add_data_kbd(uint16_t val)
 
     switch (val) {
         case FAKE_LSHIFT_ON:
-            kbd_log("fake left shift on, scan code: ");
+            keyboard_at_log("%s: Fake left shift on, scan code: ", dev->name);
             if (num_lock) {
                 if (shift_states) {
-                    kbd_log("N/A (one or both shifts on)\n");
+                    keyboard_at_log("N/A (one or both shifts on)\n");
                     break;
                 } else {
                     /* Num lock on and no shifts are pressed, send non-inverted fake shift. */
@@ -1346,7 +546,7 @@ add_data_kbd(uint16_t val)
                             break;
 
                         default:
-                            kbd_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
+                            keyboard_at_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
                             break;
                     }
                 }
@@ -1368,7 +568,7 @@ add_data_kbd(uint16_t val)
                             break;
 
                         default:
-                            kbd_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
+                            keyboard_at_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
                             break;
                     }
                 }
@@ -1389,19 +589,19 @@ add_data_kbd(uint16_t val)
                             break;
 
                         default:
-                            kbd_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
+                            keyboard_at_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
                             break;
                     }
                 }
-                kbd_log(shift_states ? "" : "N/A (both shifts off)\n");
+                keyboard_at_log(shift_states ? "" : "N/A (both shifts off)\n");
             }
             break;
 
         case FAKE_LSHIFT_OFF:
-            kbd_log("fake left shift on, scan code: ");
+            keyboard_at_log("%s: Fake left shift on, scan code: ", dev->name);
             if (num_lock) {
                 if (shift_states) {
-                    kbd_log("N/A (one or both shifts on)\n");
+                    keyboard_at_log("N/A (one or both shifts on)\n");
                     break;
                 } else {
                     /* Num lock on and no shifts are pressed, send non-inverted fake shift. */
@@ -1420,7 +620,7 @@ add_data_kbd(uint16_t val)
                             break;
 
                         default:
-                            kbd_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
+                            keyboard_at_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
                             break;
                     }
                 }
@@ -1441,7 +641,7 @@ add_data_kbd(uint16_t val)
                             break;
 
                         default:
-                            kbd_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
+                            keyboard_at_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
                             break;
                     }
                 }
@@ -1461,1368 +661,321 @@ add_data_kbd(uint16_t val)
                             break;
 
                         default:
-                            kbd_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
+                            keyboard_at_log("N/A (scan code set %i)\n", keyboard_mode & 0x02);
                             break;
                     }
                 }
-                kbd_log(shift_states ? "" : "N/A (both shifts off)\n");
+                keyboard_at_log(shift_states ? "" : "N/A (both shifts off)\n");
             }
             break;
 
         default:
-            add_data_kbd_queue(dev, val);
+            kbc_at_dev_queue_add(dev, val, 1);
             break;
     }
 }
 
-static void
-write_output(atkbd_t *dev, uint8_t val)
-{
-    uint8_t old = dev->output_port;
-    kbd_log("ATkbc: write output port: %02X (old: %02X)\n", val, dev->output_port);
-
-    uint8_t kbc_ven = dev->flags & KBC_VEN_MASK;
-
-#if 0
-    /* PS/2: Handle IRQ's. */
-    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-        /* IRQ 12 */
-        picint_common(1 << 12, 0, val & 0x20);
-
-        /* IRQ 1 */
-        picint_common(1 << 1, 0, val & 0x10);
-    }
-#endif
-
-    /* AT, PS/2: Handle A20. */
-    if ((old ^ val) & 0x02) { /* A20 enable change */
-        mem_a20_key = val & 0x02;
-        mem_a20_recalc();
-        flushmmucache();
-    }
-
-    /* AT, PS/2: Handle reset. */
-    /* 0 holds the CPU in the RESET state, 1 releases it. To simplify this,
-       we just do everything on release. */
-    if ((old ^ val) & 0x01) { /*Reset*/
-        if (!(val & 0x01)) {  /* Pin 0 selected. */
-            /* Pin 0 selected. */
-            kbd_log("write_output(): Pulse reset!\n");
-            if (machines[machine].flags & MACHINE_COREBOOT) {
-                /* The SeaBIOS hard reset code attempts a KBC reset if ACPI RESET_REG
-                   is not available. However, the KBC reset is normally a soft reset, so
-                   SeaBIOS gets caught in a soft reset loop as it tries to hard reset the
-                   machine. Hack around this by making the KBC reset a hard reset only on
-                   coreboot machines. */
-                pc_reset_hard();
-            } else {
-                softresetx86(); /*Pulse reset!*/
-                cpu_set_edx();
-                flushmmucache();
-                if (kbc_ven == KBC_VEN_ALI)
-                    smbase = 0x00030000;
-            }
-        }
-    }
-
-    /* Do this here to avoid an infinite reset loop. */
-    dev->output_port = val;
-}
-
-static void
-write_output_fast_a20(atkbd_t *dev, uint8_t val)
-{
-    uint8_t old = dev->output_port;
-    kbd_log("ATkbc: write output port in fast A20 mode: %02X (old: %02X)\n", val, dev->output_port);
-
-    /* AT, PS/2: Handle A20. */
-    if ((old ^ val) & 0x02) { /* A20 enable change */
-        mem_a20_key = val & 0x02;
-        mem_a20_recalc();
-        flushmmucache();
-    }
-
-    /* Do this here to avoid an infinite reset loop. */
-    dev->output_port = val;
-}
-
-static void
-write_cmd(atkbd_t *dev, uint8_t val)
-{
-    kbd_log("ATkbc: write command byte: %02X (old: %02X)\n", val, dev->mem[0x20]);
-
-    /* PS/2 type 2 keyboard controllers always force the XLAT bit to 0. */
-    if ((dev->flags & KBC_TYPE_MASK) == KBC_TYPE_PS2_2) {
-        val &= ~CCB_TRANSLATE;
-        dev->mem[0x20] &= ~CCB_TRANSLATE;
-    } else if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-        if (val & 0x10)
-            dev->mem[0x2e] = 0x01;
-    }
-
-    kbd_log("ATkbc: keyboard interrupt is now %s\n", (val & 0x01) ? "enabled" : "disabled");
-
-    if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-        /* Update the output port to mirror the IBF and OBF bits, if active. */
-        write_output(dev, (dev->output_port & 0x0f) | ((val & 0x03) << 4) | ((val & 0x20) ? 0xc0 : 0x00));
-    }
-
-    kbd_log("Command byte now: %02X (%02X)\n", dev->mem[0x20], val);
-
-    dev->status = (dev->status & ~STAT_SYSFLAG) | (val & STAT_SYSFLAG);
-}
-
-static void
-pulse_output(atkbd_t *dev, uint8_t mask)
-{
-    if (mask != 0x0f) {
-        dev->old_output_port = dev->output_port & ~(0xf0 | mask);
-        kbd_log("pulse_output(): Output port now: %02X\n", dev->output_port & (0xf0 | mask));
-        write_output(dev, dev->output_port & (0xf0 | mask));
-        timer_set_delay_u64(&dev->pulse_cb, 6ULL * TIMER_USEC);
-    }
-}
-
-static void
-pulse_poll(void *priv)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    kbd_log("pulse_poll(): Output port now: %02X\n", dev->output_port | dev->old_output_port);
-    write_output(dev, dev->output_port | dev->old_output_port);
-}
-
-static uint8_t
-write64_generic(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-    uint8_t  current_drive, fixed_bits;
-    uint8_t  kbc_ven = 0x0;
-    kbc_ven          = dev->flags & KBC_VEN_MASK;
-
-    switch (val) {
-        case 0xa4: /* check if password installed */
-            if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: check if password installed\n");
-                add_to_kbc_queue_front(dev, 0xf1, 0, 0x00);
-                return 0;
-            }
-            break;
-
-        case 0xa7: /* disable mouse port */
-            if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: disable mouse port\n");
-                set_enable_mouse(dev, 0);
-                return 0;
-            }
-            break;
-
-        case 0xa8: /*Enable mouse port*/
-            if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: enable mouse port\n");
-                set_enable_mouse(dev, 1);
-                return 0;
-            }
-            break;
-
-        case 0xa9: /*Test mouse port*/
-            kbd_log("ATkbc: test mouse port\n");
-            if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00); /* no error, this is testing the channel 2 interface */
-                return 0;
-            }
-            break;
-
-        case 0xaf: /* read keyboard version */
-            kbd_log("ATkbc: read keyboard version\n");
-            add_to_kbc_queue_front(dev, 0x42, 0, 0x00);
-            return 0;
-
-        case 0xc0: /* read input port */
-            kbd_log("ATkbc: read input port\n");
-            fixed_bits = 4;
-            /* The SMM handlers of Intel AMI Pentium BIOS'es expect bit 6 to be set. */
-            if (kbc_ven == KBC_VEN_INTEL_AMI)
-                fixed_bits |= 0x40;
-            if (kbc_ven == KBC_VEN_IBM_PS1) {
-                current_drive = fdc_get_current_drive();
-                add_to_kbc_queue_front(dev, dev->input_port | fixed_bits | (fdd_is_525(current_drive) ? 0x40 : 0x00),
-                                       0, 0x00);
-                dev->input_port = ((dev->input_port + 1) & 3) | (dev->input_port & 0xfc) | (fdd_is_525(current_drive) ? 0x40 : 0x00);
-            } else if (kbc_ven == KBC_VEN_NCR) {
-                /* switch settings
-                 * bit 7: keyboard disable
-                 * bit 6: display type (0 color, 1 mono)
-                 * bit 5: power-on default speed (0 high, 1 low)
-                 * bit 4: sense RAM size (0 unsupported, 1 512k on system board)
-                 * bit 3: coprocessor detect
-                 * bit 2: unused
-                 * bit 1: high/auto speed
-                 * bit 0: dma mode
-                 */
-                add_to_kbc_queue_front(dev, (dev->input_port | fixed_bits | (video_is_mda() ? 0x40 : 0x00) | (hasfpu ? 0x08 : 0x00)) & 0xdf,
-                                       0, 0x00);
-                dev->input_port = ((dev->input_port + 1) & 3) | (dev->input_port & 0xfc);
-            } else {
-                if ((kbc_ven == KBC_VEN_TG) || (kbc_ven == KBC_VEN_TG_GREEN)) {
-                    /* Bit 3, 2:
-                           1, 1: TriGem logo;
-                           1, 0: Garbled logo;
-                           0, 1: Epson logo;
-                           0, 0: Generic AMI logo. */
-                    if (dev->pci)
-                        fixed_bits |= 8;
-                    add_to_kbc_queue_front(dev, dev->input_port | fixed_bits, 0, 0x00);
-                } else if (((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) && ((dev->flags & KBC_VEN_MASK) != KBC_VEN_INTEL_AMI))
-#if 0
-                    add_to_kbc_queue_front(dev, (dev->input_port | fixed_bits) &
-                                          (((dev->flags & KBC_VEN_MASK) == KBC_VEN_ACER) ? 0xeb : 0xef), 0, 0x00);
-#else
-                    add_to_kbc_queue_front(dev, ((dev->input_port | fixed_bits) & 0xf0) | (((dev->flags & KBC_VEN_MASK) == KBC_VEN_ACER) ? 0x08 : 0x0c), 0, 0x00);
-#endif
-                else
-                    add_to_kbc_queue_front(dev, dev->input_port | fixed_bits, 0, 0x00);
-                dev->input_port = ((dev->input_port + 1) & 3) | (dev->input_port & 0xfc);
-            }
-            return 0;
-
-        case 0xd3: /* write mouse output buffer */
-            if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: write mouse output buffer\n");
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                return 0;
-            }
-            break;
-
-        case 0xd4: /* write to mouse */
-            kbd_log("ATkbc: write to mouse\n");
-            dev->want60 = 1;
-            dev->kbc_state = KBC_STATE_KBC_PARAM;
-            return 0;
-
-        case 0xf0 ... 0xff:
-            kbd_log("ATkbc: pulse %01X\n", val & 0x0f);
-            pulse_output(dev, val & 0x0f);
-            return 0;
-    }
-
-    kbd_log("ATkbc: bad command %02X\n", val);
-    return 1;
-}
-
-static uint8_t
-write60_ami(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (dev->command) {
-        /* 0x40 - 0x5F are aliases for 0x60-0x7F */
-        case 0x40 ... 0x5f:
-            kbd_log("ATkbc: AMI - alias write to %08X\n", dev->command);
-            dev->mem[(dev->command & 0x1f) + 0x20] = val;
-            if (dev->command == 0x60)
-                write_cmd(dev, val);
-            return 0;
-
-        case 0xa5: /* get extended controller RAM */
-            kbd_log("ATkbc: AMI - get extended controller RAM\n");
-            add_to_kbc_queue_front(dev, dev->mem[val], 0, 0x00);
-            return 0;
-
-        case 0xaf: /* set extended controller RAM */
-            kbd_log("ATkbc: AMI - set extended controller RAM\n");
-            if (dev->secr_phase == 1) {
-                dev->mem_addr   = val;
-                dev->want60     = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                dev->secr_phase = 2;
-            } else if (dev->secr_phase == 2) {
-                dev->mem[dev->mem_addr] = val;
-                dev->secr_phase         = 0;
-            }
-            return 0;
-
-        case 0xc1:
-            kbd_log("ATkbc: AMI MegaKey - write %02X to input port\n", val);
-            dev->input_port = val;
-            return 0;
-
-        case 0xcb: /* set keyboard mode */
-            kbd_log("ATkbc: AMI - set keyboard mode\n");
-            dev->ami_flags = val;
-            return 0;
-    }
-
-    return 1;
-}
-
-static uint8_t
-write64_ami(void *priv, uint8_t val)
-{
-    atkbd_t *dev     = (atkbd_t *) priv;
-    uint8_t  kbc_ven = dev->flags & KBC_VEN_MASK;
-
-    switch (val) {
-        case 0x00 ... 0x1f:
-            kbd_log("ATkbc: AMI - alias read from %08X\n", val);
-            add_to_kbc_queue_front(dev, dev->mem[val + 0x20], 0, 0x00);
-            return 0;
-
-        case 0x40 ... 0x5f:
-            kbd_log("ATkbc: AMI - alias write to %08X\n", dev->command);
-            dev->want60 = 1;
-            dev->kbc_state = KBC_STATE_KBC_PARAM;
-            return 0;
-
-        case 0xa0: /* copyright message */
-            kbc_queue_add(dev, 0x28, 0);
-            kbc_queue_add(dev, 0x00, 0);
-            dev->kbc_state = KBC_STATE_KBC_OUT;
-            break;
-
-        case 0xa1: /* get controller version */
-            kbd_log("ATkbc: AMI - get controller version\n");
-            if ((kbc_ven == KBC_VEN_TG) || (kbc_ven == KBC_VEN_TG_GREEN))
-                    add_to_kbc_queue_front(dev, 'Z', 0, 0x00);
-            else if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                if (kbc_ven == KBC_VEN_ALI)
-                    add_to_kbc_queue_front(dev, 'F', 0, 0x00);
-                else if ((dev->flags & KBC_VEN_MASK) == KBC_VEN_INTEL_AMI)
-                    add_to_kbc_queue_front(dev, '5', 0, 0x00);
-                else if (cpu_64bitbus)
-                    add_to_kbc_queue_front(dev, 'R', 0, 0x00);
-                else if (is486)
-                    add_to_kbc_queue_front(dev, 'P', 0, 0x00);
-                else
-                    add_to_kbc_queue_front(dev, 'H', 0, 0x00);
-            } else if (is386 && !is486) {
-                if (cpu_16bitbus)
-                    add_to_kbc_queue_front(dev, 'D', 0, 0x00);
-                else
-                    add_to_kbc_queue_front(dev, 'B', 0, 0x00);
-            } else if (!is386)
-                add_to_kbc_queue_front(dev, '8', 0, 0x00);
-            else
-                add_to_kbc_queue_front(dev, 'F', 0, 0x00);
-            return 0;
-
-        case 0xa2: /* clear keyboard controller lines P22/P23 */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - clear KBC lines P22 and P23\n");
-                write_output(dev, dev->output_port & 0xf3);
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-                return 0;
-            }
-            break;
-
-        case 0xa3: /* set keyboard controller lines P22/P23 */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - set KBC lines P22 and P23\n");
-                write_output(dev, dev->output_port | 0x0c);
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-                return 0;
-            }
-            break;
-
-        case 0xa4: /* write clock = low */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - write clock = low\n");
-                dev->ami_stat &= 0xfe;
-                return 0;
-            }
-            break;
-
-        case 0xa5: /* write clock = high */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - write clock = high\n");
-                dev->ami_stat |= 0x01;
-            } else {
-                kbd_log("ATkbc: get extended controller RAM\n");
-                dev->want60     = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-            }
-            return 0;
-
-        case 0xa6: /* read clock */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - read clock\n");
-                add_to_kbc_queue_front(dev, (dev->ami_stat & 1) ? 0xff : 0x00, 0, 0x00);
-                return 0;
-            }
-            break;
-
-        case 0xa7: /* write cache bad */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - write cache bad\n");
-                dev->ami_stat &= 0xfd;
-                return 0;
-            }
-            break;
-
-        case 0xa8: /* write cache good */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - write cache good\n");
-                dev->ami_stat |= 0x02;
-                return 0;
-            }
-            break;
-
-        case 0xa9: /* read cache */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) {
-                kbd_log("ATkbc: AMI - read cache\n");
-                add_to_kbc_queue_front(dev, (dev->ami_stat & 2) ? 0xff : 0x00, 0, 0x00);
-                return 0;
-            }
-            break;
-
-        case 0xaf: /* set extended controller RAM */
-            if (kbc_ven == KBC_VEN_ALI) {
-                kbd_log("ATkbc: Award/ALi/VIA keyboard controller revision\n");
-                add_to_kbc_queue_front(dev, 0x43, 0, 0x00);
-            } else {
-                kbd_log("ATkbc: set extended controller RAM\n");
-                dev->want60     = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                dev->secr_phase = 1;
-            }
-            return 0;
-
-        case 0xb0 ... 0xb3:
-            /* set KBC lines P10-P13 (input port bits 0-3) low */
-            kbd_log("ATkbc: set KBC lines P10-P13 (input port bits 0-3) low\n");
-            if (!(dev->flags & DEVICE_PCI) || (val > 0xb1))
-                dev->input_port &= ~(1 << (val & 0x03));
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-
-        case 0xb4: case 0xb5:
-            /* set KBC lines P22-P23 (output port bits 2-3) low */
-            kbd_log("ATkbc: set KBC lines P22-P23 (output port bits 2-3) low\n");
-            if (!(dev->flags & DEVICE_PCI))
-                write_output(dev, dev->output_port & ~(4 << (val & 0x01)));
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-
-        case 0xb8 ... 0xbb:
-            /* set KBC lines P10-P13 (input port bits 0-3) high */
-            kbd_log("ATkbc: set KBC lines P10-P13 (input port bits 0-3) high\n");
-            if (!(dev->flags & DEVICE_PCI) || (val > 0xb9)) {
-                dev->input_port |= (1 << (val & 0x03));
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            }
-            return 0;
-
-        case 0xbc: case 0xbd:
-            /* set KBC lines P22-P23 (output port bits 2-3) high */
-            kbd_log("ATkbc: set KBC lines P22-P23 (output port bits 2-3) high\n");
-            if (!(dev->flags & DEVICE_PCI))
-                write_output(dev, dev->output_port | (4 << (val & 0x01)));
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-
-        case 0xc1: /* write input port */
-            kbd_log("ATkbc: AMI MegaKey - write input port\n");
-            dev->want60 = 1;
-            dev->kbc_state = KBC_STATE_KBC_PARAM;
-            return 0;
-
-        case 0xc4:
-            /* set KBC line P14 low */
-            kbd_log("ATkbc: set KBC line P14 (input port bit 4) low\n");
-            dev->input_port &= 0xef;
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-        case 0xc5:
-            /* set KBC line P15 low */
-            kbd_log("ATkbc: set KBC line P15 (input port bit 5) low\n");
-            dev->input_port &= 0xdf;
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-
-        case 0xc8:
-            /*
-             * unblock KBC lines P22/P23
-             * (allow command D1 to change bits 2/3 of the output port)
-             */
-            kbd_log("ATkbc: AMI - unblock KBC lines P22 and P23\n");
-            dev->ami_flags &= 0xfb;
-            return 0;
-
-        case 0xc9:
-            /*
-             * block KBC lines P22/P23
-             * (disallow command D1 from changing bits 2/3 of the port)
-             */
-            kbd_log("ATkbc: AMI - block KBC lines P22 and P23\n");
-            dev->ami_flags |= 0x04;
-            return 0;
-
-        case 0xcc:
-            /* set KBC line P14 high */
-            kbd_log("ATkbc: set KBC line P14 (input port bit 4) high\n");
-            dev->input_port |= 0x10;
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-        case 0xcd:
-            /* set KBC line P15 high */
-            kbd_log("ATkbc: set KBC line P15 (input port bit 5) high\n");
-            dev->input_port |= 0x20;
-            add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-
-        case 0xef: /* ??? - sent by AMI486 */
-            kbd_log("ATkbc: ??? - sent by AMI486\n");
-            return 0;
-    }
-
-    return write64_generic(dev, val);
-}
-
-static uint8_t
-write64_ibm_mca(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (val) {
-        case 0xc1: /*Copy bits 0 to 3 of input port to status bits 4 to 7*/
-            kbd_log("ATkbc: copy bits 0 to 3 of input port to status bits 4 to 7\n");
-            dev->status &= 0x0f;
-            dev->status |= ((((dev->input_port & 0xfc) | 0x84) & 0x0f) << 4);
-            return 0;
-
-        case 0xc2: /*Copy bits 4 to 7 of input port to status bits 4 to 7*/
-            kbd_log("ATkbc: copy bits 4 to 7 of input port to status bits 4 to 7\n");
-            dev->status &= 0x0f;
-            dev->status |= (((dev->input_port & 0xfc) | 0x84) & 0xf0);
-            return 0;
-
-        case 0xaf:
-            kbd_log("ATkbc: bad KBC command AF\n");
-            return 1;
-
-        case 0xf0 ... 0xff:
-            kbd_log("ATkbc: pulse: %01X\n", (val & 0x03) | 0x0c);
-            pulse_output(dev, (val & 0x03) | 0x0c);
-            return 0;
-    }
-
-    return write64_generic(dev, val);
-}
-
-static uint8_t
-write60_quadtel(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (dev->command) {
-        case 0xcf: /*??? - sent by MegaPC BIOS*/
-            kbd_log("ATkbc: ??? - sent by MegaPC BIOS\n");
-            return 0;
-    }
-
-    return 1;
-}
-
-static uint8_t
-write64_olivetti(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (val) {
-        case 0x80: /* Olivetti-specific command */
-            /*
-             * bit 7: bus expansion board present (M300) / keyboard unlocked (M290)
-             * bits 4-6: ???
-             * bit 3: fast ram check (if inactive keyboard works erratically)
-             * bit 2: keyboard fuse present
-             * bits 0-1: ???
-             */
-            add_to_kbc_queue_front(dev, (0x0c | ((is386) ? 0x00 : 0x80)) & 0xdf, 0, 0x00);
-            dev->input_port = ((dev->input_port + 1) & 3) | (dev->input_port & 0xfc);
-            return 0;
-    }
-
-    return write64_generic(dev, val);
-}
-
-static uint8_t
-write64_quadtel(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (val) {
-        case 0xaf:
-            kbd_log("ATkbc: bad KBC command AF\n");
-            return 1;
-
-        case 0xcf: /*??? - sent by MegaPC BIOS*/
-            kbd_log("ATkbc: ??? - sent by MegaPC BIOS\n");
-            dev->want60 = 1;
-            dev->kbc_state = KBC_STATE_KBC_PARAM;
-            return 0;
-    }
-
-    return write64_generic(dev, val);
-}
-
-static uint8_t
-write60_toshiba(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (dev->command) {
-        case 0xb6: /* T3100e - set color/mono switch */
-            kbd_log("ATkbc: T3100e - set color/mono switch\n");
-            t3100e_mono_set(val);
-            return 0;
-    }
-
-    return 1;
-}
-
-static uint8_t
-write64_toshiba(void *priv, uint8_t val)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-
-    switch (val) {
-        case 0xaf:
-            kbd_log("ATkbc: bad KBC command AF\n");
-            return 1;
-
-        case 0xb0: /* T3100e: Turbo on */
-            kbd_log("ATkbc: T3100e: Turbo on\n");
-            t3100e_turbo_set(1);
-            return 0;
-
-        case 0xb1: /* T3100e: Turbo off */
-            kbd_log("ATkbc: T3100e: Turbo off\n");
-            t3100e_turbo_set(0);
-            return 0;
-
-        case 0xb2: /* T3100e: Select external display */
-            kbd_log("ATkbc: T3100e: Select external display\n");
-            t3100e_display_set(0x00);
-            return 0;
-
-        case 0xb3: /* T3100e: Select internal display */
-            kbd_log("ATkbc: T3100e: Select internal display\n");
-            t3100e_display_set(0x01);
-            return 0;
-
-        case 0xb4: /* T3100e: Get configuration / status */
-            kbd_log("ATkbc: T3100e: Get configuration / status\n");
-            add_to_kbc_queue_front(dev, t3100e_config_get(), 0, 0x00);
-            return 0;
-
-        case 0xb5: /* T3100e: Get colour / mono byte */
-            kbd_log("ATkbc: T3100e: Get colour / mono byte\n");
-            add_to_kbc_queue_front(dev, t3100e_mono_get(), 0, 0x00);
-            return 0;
-
-        case 0xb6: /* T3100e: Set colour / mono byte */
-            kbd_log("ATkbc: T3100e: Set colour / mono byte\n");
-            dev->want60 = 1;
-            dev->kbc_state = KBC_STATE_KBC_PARAM;
-            return 0;
-
-        case 0xb7: /* T3100e: Emulate PS/2 keyboard */
-        case 0xb8: /* T3100e: Emulate AT keyboard */
-            dev->flags &= ~KBC_TYPE_MASK;
-            if (val == 0xb7) {
-                kbd_log("ATkbc: T3100e: Emulate PS/2 keyboard\n");
-                dev->flags |= KBC_TYPE_PS2_NOREF;
-            } else {
-                kbd_log("ATkbc: T3100e: Emulate AT keyboard\n");
-                dev->flags |= KBC_TYPE_ISA;
-            }
-            return 0;
-
-        case 0xbb: /* T3100e: Read 'Fn' key.
-                      Return it for right Ctrl and right Alt; on the real
-                      T3100e, these keystrokes could only be generated
-                      using 'Fn'. */
-            kbd_log("ATkbc: T3100e: Read 'Fn' key\n");
-            if (keyboard_recv(0xb8) || /* Right Alt */
-                keyboard_recv(0x9d))   /* Right Ctrl */
-                add_to_kbc_queue_front(dev, 0x04, 0, 0x00);
-            else
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
-            return 0;
-
-        case 0xbc: /* T3100e: Reset Fn+Key notification */
-            kbd_log("ATkbc: T3100e: Reset Fn+Key notification\n");
-            t3100e_notify_set(0x00);
-            return 0;
-
-        case 0xc0: /*Read input port*/
-            kbd_log("ATkbc: read input port\n");
-
-            /* The T3100e returns all bits set except bit 6 which
-             * is set by t3100e_mono_set() */
-            dev->input_port = (t3100e_mono_get() & 1) ? 0xff : 0xbf;
-            add_to_kbc_queue_front(dev, dev->input_port, 0, 0x00);
-            return 0;
-    }
-
-    return write64_generic(dev, val);
-}
-
-static void
-kbd_key_reset(atkbd_t *dev, int do_fa)
-{
-    dev->out_new = -1;
-    kbc_queue_reset(dev, 1);
-
-    dev->kbd_last_scan_code = 0x00;
-
-    /* Set scan code set to 2. */
-    keyboard_mode = 0x02;
-    set_scancode_map(dev);
-
-    /* The BAT enables scanning. */
-    keyboard_scan = 1;
-
-    dev->sc_or = 0;
-
-    if (do_fa)
-        add_data_kbd_front(dev, 0xfa);
-    add_data_kbd_front(dev, 0xaa);
-
-    if (!do_fa)
-        dev->kbd_state = DEV_STATE_MAIN_OUT;
-}
-
-static void
-kbd_aux_reset(atkbd_t *dev, int do_fa)
-{
-    dev->out_new_mouse = -1;
-    kbc_queue_reset(dev, 2);
-
-    /* The BAT enables scanning. */
-    mouse_scan = 1;
-
-    if (!do_fa) {
-        add_data_kbd_front(dev, 0xaa);
-        add_data_kbd_front(dev, 0x00);
-
-        dev->mouse_state = DEV_STATE_MAIN_OUT;
-    }
-}
-
 void
-keyboard_at_mouse_reset(void)
+keyboard_at_clear_data(void *priv)
 {
-    atkbd_t *dev = SavedKbd;
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
 
-    kbd_aux_reset(dev, 1);
+    dev->flags &= ~FLAG_CTRLDAT;
 }
 
 static void
-kbd_process_cmd(void *priv)
+keyboard_at_set_defaults(atkbc_dev_t *dev)
 {
-    atkbd_t *dev = (atkbd_t *) priv;
+    dev->rate = 1;
 
-    dev->kbd_state = DEV_STATE_MAIN_OUT;
+    keyboard_set3_all_break  = 0;
+    keyboard_set3_all_repeat = 0;
+    memset(keyboard_set3_flags, 0, 512);
 
-    if (dev->key_wantdata) {
-        dev->key_wantdata = 0;
+    keyboard_mode = 0x02;
+    keyboard_at_set_scancode_set();
+}
 
-        /*
-         * Several system BIOSes and OS device drivers
-         * mess up with this, and repeat the command
-         * code many times.  Fun!
-         */
-        if (dev->key_dat == dev->key_command) {
-            /* Respond NAK and ignore it. */
-            add_data_kbd_front(dev, 0xfe);
-            dev->key_command = 0x00;
-            return;
-        }
+static void
+keyboard_at_bat(void *priv)
+{
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
 
-        switch (dev->key_command) {
-            case 0xed: /* set/reset LEDs */
-                add_data_kbd_front(dev, 0xfa);
-                kbd_log("ATkbd: set LEDs [%02x]\n", dev->key_dat);
+    keyboard_at_set_defaults(dev);
+
+    keyboard_scan = 1;
+
+    kbc_at_dev_queue_add(dev, 0xaa, 0);
+}
+
+static void
+keyboard_at_invalid_cmd(atkbc_dev_t *dev)
+{
+    keyboard_at_log("%s: Invalid command [%02X]\n", dev->name, dev->port->dat);
+    kbc_at_dev_queue_add(dev, inv_cmd_response, 0);
+}
+
+
+static void
+keyboard_at_write(void *priv)
+{
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
+    uint8_t  i, val;
+
+    if (dev->port == NULL)
+        return;
+
+    val = dev->port->dat;
+
+    dev->state = DEV_STATE_MAIN_OUT;
+
+    if ((val < 0xed) && (dev->flags & FLAG_CTRLDAT)) {
+        dev->flags &= ~FLAG_CTRLDAT;
+
+        switch (dev->command) {
+            case 0xed: /* Set/reset LEDs */
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
+                keyboard_at_log("%s: Set/reset LEDs [%02X]\n", dev->name, val);
                 break;
 
-            case 0xf0: /* get/set scancode set */
-                add_data_kbd_front(dev, 0xfa);
-                if (dev->key_dat == 0) {
-                    kbd_log("Get scan code set: %02X\n", keyboard_mode);
-                    add_data_kbd_front(dev, keyboard_mode);
-                } else {
-                    if (dev->key_dat <= 3) {
-                        keyboard_mode = dev->key_dat;
-                        kbd_log("Scan code set now: %02X\n", keyboard_mode);
-                    }
-                    set_scancode_map(dev);
+            case 0xf0: /* Get/set scancode set */
+                kbc_at_dev_queue_add(dev, (val > 3) ? 0xfe : 0xfa, 0);
+                switch (val) {
+                    case 0x00:
+                        keyboard_at_log("%s: Get scan code set [%02X]\n", dev->name, keyboard_mode);
+                        kbc_at_dev_queue_add(dev, keyboard_mode, 0);
+                        break;
+                    case 0x01 ... 0x03:
+                        keyboard_mode = val;
+                        keyboard_at_log("%s: Set scan code set [%02X]\n", dev->name, keyboard_mode);
+                        keyboard_at_set_scancode_set();
+                        break;
+                    default:
+                        /* Fatal so any instance of anything attempting to set scan code > 3 can be reported to us. */
+                        fatal("%s: Scan code set [%02X] invalid, resend\n", dev->name, val);
+                        dev->flags |= FLAG_CTRLDAT;
+                        dev->state = DEV_STATE_MAIN_WANT_IN;
+                        break;
                 }
                 break;
 
             case 0xf3: /* set typematic rate/delay */
-                add_data_kbd_front(dev, 0xfa);
+                if (val & 0x80) {
+                    keyboard_at_log("%s: Set typematic rate/delay [%02X] has bit 7 set - invalid\n", dev->name, val);
+                    dev->flags |= FLAG_CTRLDAT;      /* Resend = keep waiting for parameter. */
+                    kbc_at_dev_queue_add(dev, 0xfe, 0); /* Command response */
+                    dev->state = DEV_STATE_MAIN_WANT_IN;
+                } else {
+                    dev->rate = val;
+                    kbc_at_dev_queue_add(dev, 0xfa, 0); /* Command response */
+                    keyboard_at_log("%s: Set typematic rate/delay [%02X]\n", dev->name, val);
+                }
                 break;
 
             default:
-                kbd_log("ATkbd: bad keyboard 0060 write %02X command %02X\n", dev->key_dat, dev->key_command);
-                add_data_kbd_front(dev, 0xfe);
-                break;
+                fatal("%s: Parameter [%02X] for invalid command [%02X] - possibly memory corruption!\n", dev->name, val, dev->command);
+                kbc_at_dev_queue_add(dev, 0xfe, 0);
+        }
+    } else {
+        if (dev->flags & FLAG_CTRLDAT) {
+            /* Special case - another command during another command that wants input - proceed
+               as normal but do not cancel the command (so keep waiting for input), unless the
+               command in progress is ED (Set/reset LEDs). */
+            if (val == 0xed) {
+                keyboard_scan = 1;
+                dev->flags &= ~FLAG_CTRLDAT;
+            } else
+                dev->state = DEV_STATE_MAIN_WANT_IN;
         }
 
-        /* Keyboard command is now done. */
-        dev->key_command = 0x00;
-    } else {
-        /* No keyboard command in progress. */
-        dev->key_command = 0x00;
-
-        switch (dev->key_dat) {
-            case 0x00 ... 0x7f:
-                kbd_log("ATkbd: invalid command %02X\n", dev->key_dat);
-                add_data_kbd_front(dev, 0xfe);
-                break;
-
+        switch (val) {
             case 0xed: /* set/reset LEDs */
-                kbd_log("ATkbd: set/reset leds\n");
-                add_data_kbd_front(dev, 0xfa);
-
-                dev->key_wantdata = 1;
-                dev->kbd_state = DEV_STATE_MAIN_WANT_IN;
+                dev->command = val;
+                keyboard_at_log("%s: set/reset LEDs\n", dev->name);
+                dev->flags |= FLAG_CTRLDAT;
+                kbc_at_dev_queue_add(dev, 0xfa, 0); /* ACK for command byte */
+                dev->state = DEV_STATE_MAIN_WANT_IN;
                 break;
 
             case 0xee: /* diagnostic echo */
-                kbd_log("ATkbd: ECHO\n");
-                add_data_kbd_front(dev, 0xee);
+                keyboard_at_log("%s: ECHO\n", dev->name);
+                kbc_at_dev_queue_add(dev, 0xee, 0);
                 break;
 
-            case 0xef: /* NOP (reserved for future use) */
-                kbd_log("ATkbd: NOP\n");
+            case 0xef: /* Invalid command */
+            case 0xf1: /* Invalid command */
+                keyboard_at_log("%s: Invalid command [%02X]\n", dev->name, dev->port->dat);
+                kbc_at_dev_queue_add(dev, inv_cmd_response, 0);
                 break;
 
             case 0xf0: /* get/set scan code set */
-                kbd_log("ATkbd: scan code set\n");
-                add_data_kbd_front(dev, 0xfa);
-                dev->key_wantdata = 1;
-                dev->kbd_state = DEV_STATE_MAIN_WANT_IN;
+                if (dev->type & FLAG_PS2) {
+                    dev->command = val;
+                    keyboard_at_log("%s: scan code set\n", dev->name);
+                    dev->flags |= FLAG_CTRLDAT;
+                    kbc_at_dev_queue_add(dev, 0xfa, 0); /* ACK for command byte */
+                    dev->state = DEV_STATE_MAIN_WANT_IN;
+                } else
+                    keyboard_at_invalid_cmd(dev);
                 break;
 
             case 0xf2: /* read ID */
-                kbd_log("ATkbd: read keyboard id\n");
+                keyboard_at_log("%s: read keyboard id\n", dev->name);
                 /* TODO: After keyboard type selection is implemented, make this
                          return the correct keyboard ID for the selected type. */
-                add_data_kbd_front(dev, 0xfa);
-                add_data_kbd_front(dev, 0xab);
-                add_data_kbd_front(dev, 0x83);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
+                for (i = 0; i < 4; i++) {
+                    if (id_bytes[dev->type][i] == 0)
+                        break;
+
+                    kbc_at_dev_queue_add(dev, id_bytes[dev->type][i], 0);
+                }
                 break;
 
-            case 0xf3: /* set typematic rate/delay */
-                kbd_log("ATkbd: set typematic rate/delay\n");
-                add_data_kbd_front(dev, 0xfa);
-                dev->key_wantdata = 1;
-                dev->kbd_state = DEV_STATE_MAIN_WANT_IN;
+            case 0xf3: /* set command mode */
+                dev->command = val;
+                keyboard_at_log("%s: set typematic rate/delay\n", dev->name);
+                dev->flags |= FLAG_CTRLDAT;
+                kbc_at_dev_queue_add(dev, 0xfa, 0); /* ACK for command byte */
+                dev->state = DEV_STATE_MAIN_WANT_IN;
                 break;
 
-            case 0xf4: /* enable keyboard */
-                kbd_log("ATkbd: enable keyboard\n");
-                add_data_kbd_front(dev, 0xfa);
+            case 0xf4: /* enable */
+                keyboard_at_log("%s: enable keyboard\n", dev->name);
                 keyboard_scan = 1;
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
                 break;
 
             case 0xf5: /* set defaults and disable keyboard */
             case 0xf6: /* set defaults */
-                kbd_log("ATkbd: set defaults%s\n", (dev->key_dat == 0xf6) ? "" : " and disable keyboard");
-                keyboard_scan = (dev->key_dat == 0xf6);
-                kbd_log("dev->key_dat = %02X, keyboard_scan = %i, dev->mem[0x20] = %02X\n",
-                        dev->key_dat, keyboard_scan, dev->mem[0]);
-                add_data_kbd_front(dev, 0xfa);
+                keyboard_at_log("%s: set defaults%s\n", (val == 0xf6) ? "" : " and disable keyboard");
+                keyboard_scan = !(val & 0x01);
+                keyboard_at_log("%s: val = %02X, keyboard_scan = %i\n",
+                                dev->name, val, keyboard_scan);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
 
                 keyboard_set3_all_break  = 0;
                 keyboard_set3_all_repeat = 0;
                 memset(keyboard_set3_flags, 0, 512);
+
                 keyboard_mode = 0x02;
-                set_scancode_map(dev);
+                keyboard_at_set_scancode_set();
                 break;
 
             case 0xf7: /* set all keys to repeat */
-                kbd_log("ATkbd: set all keys to repeat\n");
-                add_data_kbd_front(dev, 0xfa);
-                keyboard_set3_all_break = 1;
+                if (dev->type & FLAG_PS2) {
+                    keyboard_at_log("%s: set all keys to repeat\n", dev->name);
+                    kbc_at_dev_queue_add(dev, 0xfa, 0);
+                    keyboard_set3_all_break = 1;
+                } else
+                    keyboard_at_invalid_cmd(dev);
                 break;
 
             case 0xf8: /* set all keys to give make/break codes */
-                kbd_log("ATkbd: set all keys to give make/break codes\n");
-                add_data_kbd_front(dev, 0xfa);
-                keyboard_set3_all_break = 1;
+                if (dev->type & FLAG_PS2) {
+                    keyboard_at_log("%s: set all keys to give make/break codes\n", dev->name);
+                    kbc_at_dev_queue_add(dev, 0xfa, 0);
+                    keyboard_set3_all_break = 1;
+                } else
+                    keyboard_at_invalid_cmd(dev);
                 break;
 
             case 0xf9: /* set all keys to give make codes only */
-                kbd_log("ATkbd: set all keys to give make codes only\n");
-                add_data_kbd_front(dev, 0xfa);
-                keyboard_set3_all_break = 0;
+                if (dev->type & FLAG_PS2) {
+                    keyboard_at_log("%s: set all keys to give make codes only\n", dev->name);
+                    kbc_at_dev_queue_add(dev, 0xfa, 0);
+                    keyboard_set3_all_break = 0;
+                } else
+                    keyboard_at_invalid_cmd(dev);
                 break;
 
             case 0xfa: /* set all keys to repeat and give make/break codes */
-                kbd_log("ATkbd: set all keys to repeat and give make/break codes\n");
-                add_data_kbd_front(dev, 0xfa);
-                keyboard_set3_all_repeat = 1;
-                keyboard_set3_all_break  = 1;
+                if (dev->type & FLAG_PS2) {
+                    keyboard_at_log("%s: set all keys to repeat and give make/break codes\n", dev->name);
+                    kbc_at_dev_queue_add(dev, 0xfa, 0);
+                    keyboard_set3_all_repeat = 1;
+                    keyboard_set3_all_break  = 1;
+                } else
+                    keyboard_at_invalid_cmd(dev);
                 break;
 
+            /* TODO: Actually implement these commands. */
+            case 0xfb: /* set some keys to repeat */
+                keyboard_at_log("%s: set some keys to repeat\n", dev->name);
+                kbc_at_dev_queue_add(dev, inv_cmd_response, 0);
+                break;
+
+            case 0xfc: /* set some keys to give make/break codes */
+                keyboard_at_log("%s: set some keys to give make/break codes\n", dev->name);
+                kbc_at_dev_queue_add(dev, inv_cmd_response, 0);
+                break;
+
+            case 0xfd: /* set some keys to give make codes only */
+                keyboard_at_log("%s: set some keys to give make codes only\n", dev->name);
+                kbc_at_dev_queue_add(dev, inv_cmd_response, 0);
+                break;
+
+            /* TODO: This is supposed to resend multiple bytes after some commands. */
             case 0xfe: /* resend last scan code */
-                kbd_log("ATkbd: resend last scan code\n");
-                add_data_kbd_front(dev, dev->kbd_last_scan_code);
+                keyboard_at_log("%s: resend last scan code\n", dev->name);
+                kbc_at_dev_queue_add(dev, dev->last_scan_code, 0);
                 break;
 
             case 0xff: /* reset */
-                kbd_log("ATkbd: kbd reset\n");
-                kbd_key_reset(dev, 1);
-                break;
-
-           default:
-                kbd_log("ATkbd: bad keyboard command %02X\n", dev->key_dat);
-                add_data_kbd_front(dev, 0xfe);
-        }
-
-        /* If command needs data, remember command. */
-        if (dev->key_wantdata == 1)
-            dev->key_command = dev->key_dat;
-    }
-}
-
-static void
-kbc_process_cmd(void *priv)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-    int      i = 0, bad    = 1;
-    uint8_t  mask, kbc_ven = dev->flags & KBC_VEN_MASK;
-    uint8_t  cmd_ac_conv[16] = { 0x0b, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x1e, 0x30, 0x2e, 0x20, 0x12, 0x21 };
-
-    if (dev->status & STAT_CD) {
-        /* Controller command. */
-        dev->want60 = 0;
-        dev->kbc_state = KBC_STATE_MAIN_IBF;
-
-        /* Clear the keyboard controller queue. */
-        kbc_queue_reset(dev, 0);
-
-        switch (dev->ib) {
-            /* Read data from KBC memory. */
-            case 0x20 ... 0x3f:
-                add_to_kbc_queue_front(dev, dev->mem[dev->ib], 0, 0x00);
-                break;
-
-            /* Write data to KBC memory. */
-            case 0x60 ... 0x7f:
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                break;
-
-            case 0xaa: /* self-test */
-                kbd_log("ATkbc: self-test\n");
-
-                if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                    if (dev->kbc_state != KBC_STATE_RESET) {
-                        kbd_log("ATkbc: self-test reinitialization\n");
-                        /* Yes, the firmware has an OR, but we need to make sure to keep any forcibly lowered bytes lowered. */
-                        /* TODO: Proper P1 implementation, with OR and AND flags in the machine table. */
-                        dev->input_port = dev->input_port & 0xff;
-                        write_output(dev, 0x4b);
-                    }
-
-                    dev->status = (dev->status & 0x0f) | 0x60;
-
-                    dev->mem[0x20] = 0x30;
-                    dev->mem[0x21] = 0x01;
-                    dev->mem[0x22] = 0x0b;
-                    dev->mem[0x25] = 0x02;
-                    dev->mem[0x27] = 0xf8;
-                    dev->mem[0x28] = 0xce;
-                    dev->mem[0x29] = 0x0b;
-                    dev->mem[0x2a] = 0x10;
-                    dev->mem[0x2b] = 0x20;
-                    dev->mem[0x2c] = 0x15;
-                    dev->mem[0x30] = 0x0b;
-                } else {
-                    if (dev->kbc_state != KBC_STATE_RESET) {
-                        kbd_log("ATkbc: self-test reinitialization\n");
-                        /* Yes, the firmware has an OR, but we need to make sure to keep any forcibly lowered bytes lowered. */
-                        /* TODO: Proper P1 implementation, with OR and AND flags in the machine table. */
-                        dev->input_port = dev->input_port & 0xff;
-                        write_output(dev, 0xcf);
-                    }
-
-                    dev->status = (dev->status & 0x0f) | 0x60;
-
-                    dev->mem[0x20] = 0x10;
-                    dev->mem[0x21] = 0x01;
-                    dev->mem[0x22] = 0x06;
-                    dev->mem[0x25] = 0x01;
-                    dev->mem[0x27] = 0xfb;
-                    dev->mem[0x28] = 0xe0;
-                    dev->mem[0x29] = 0x06;
-                    dev->mem[0x2a] = 0x10;
-                    dev->mem[0x2b] = 0x20;
-                    dev->mem[0x2c] = 0x15;
-                }
-
-                dev->out_new = dev->out_new_mouse = -1;
-                kbc_queue_reset(dev, 0);
-
-                // dev->kbc_state = KBC_STATE_MAIN_IBF;
-                dev->kbc_state = KBC_STATE_KBC_OUT;
-
-                // add_to_kbc_queue_front(dev, 0x55, 0, 0x00);
-                kbc_queue_add(dev, 0x55, 0);
-                break;
-
-            case 0xab: /* interface test */
-                kbd_log("ATkbc: interface test\n");
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00); /*no error*/
-                break;
-
-            case 0xac: /* diagnostic dump */
-                if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                    kbd_log("ATkbc: diagnostic dump\n");
-                   dev->mem[0x30] = (dev->input_port & 0xf0) | 0x80;
-                   dev->mem[0x31] = dev->output_port;
-                   dev->mem[0x32] = 0x00;    /* T0 and T1. */
-                   dev->mem[0x33] = 0x00;    /* PSW - Program Status Word - always return 0x00 because we do not emulate this byte. */
-                   /* 20 bytes in high nibble in set 1, low nibble in set 1, set 1 space format = 60 bytes. */
-                   for (i = 0; i < 20; i++) {
-                       kbc_queue_add(dev, cmd_ac_conv[dev->mem[i + 0x20] >> 4], 0);
-                       kbc_queue_add(dev, cmd_ac_conv[dev->mem[i + 0x20] & 0x0f], 0);
-                       kbc_queue_add(dev, 0x39, 0);
-                   }
-                   dev->kbc_state = KBC_STATE_KBC_OUT;
-                }
-                break;
-
-            case 0xad: /* disable keyboard */
-                kbd_log("ATkbc: disable keyboard\n");
-                set_enable_kbd(dev, 0);
-                break;
-
-            case 0xae: /* enable keyboard */
-                kbd_log("ATkbc: enable keyboard\n");
-                set_enable_kbd(dev, 1);
-                break;
-
-            case 0xc7: /* set port1 bits */
-                kbd_log("ATkbc: Phoenix - set port1 bits\n");
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                break;
-
-            case 0xca: /* read keyboard mode */
-                kbd_log("ATkbc: AMI - read keyboard mode\n");
-                add_to_kbc_queue_front(dev, dev->ami_flags, 0, 0x00);
-                break;
-
-            case 0xcb: /* set keyboard mode */
-                kbd_log("ATkbc: AMI - set keyboard mode\n");
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                break;
-
-            case 0xd0: /* read output port */
-                kbd_log("ATkbc: read output port\n");
-                 mask = 0xff;
-                if ((kbc_ven != KBC_VEN_OLIVETTI) && ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF) && (dev->mem[0x20] & 0x10))
-                    mask &= 0xbf;
-                add_to_kbc_queue_front(dev, dev->output_port & mask, 0, 0x00);
-                break;
-
-            case 0xd1: /* write output port */
-                kbd_log("ATkbc: write output port\n");
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                break;
-
-            case 0xd2: /* write keyboard output buffer */
-                kbd_log("ATkbc: write keyboard output buffer\n");
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                break;
-
-            case 0xdd: /* disable A20 address line */
-            case 0xdf: /* enable A20 address line */
-                kbd_log("ATkbc: %sable A20\n", (dev->ib == 0xdd) ? "dis" : "en");
-                write_output_fast_a20(dev, (dev->output_port & 0xfd) | (dev->ib & 0x02));
-                break;
-
-            case 0xe0: /* read test inputs */
-                kbd_log("ATkbc: read test inputs\n");
-                add_to_kbc_queue_front(dev, 0x00, 0, 0x00);
+                kbc_at_dev_reset(dev, 1);
                 break;
 
             default:
-                /*
-                 * Unrecognized controller command.
-                 *
-                 * If we have a vendor-specific handler, run
-                 * that. Otherwise, or if that handler fails,
-                 * log a bad command.
-                 */
-                if (dev->write64_ven)
-                    bad = dev->write64_ven(dev, dev->ib);
-
-                kbd_log(bad ? "ATkbc: bad controller command %02X\n" : "", dev->ib);
-        }
-
-        /* If the command needs data, remember the command. */
-        if (dev->want60)
-            dev->command = dev->ib;
-    } else if (dev->want60) {
-        /* Write data to controller. */
-        dev->want60 = 0;
-        dev->kbc_state = KBC_STATE_MAIN_IBF;
-
-        switch (dev->command) {
-            case 0x60 ... 0x7f:
-                dev->mem[(dev->command & 0x1f) + 0x20] = dev->ib;
-                if (dev->command == 0x60)
-                    write_cmd(dev, dev->ib);
-                break;
-
-            case 0xc7: /* set port1 bits */
-                kbd_log("ATkbc: Phoenix - set port1 bits\n");
-                dev->input_port |= dev->ib;
-                break;
-
-            case 0xd1: /* write output port */
-                kbd_log("ATkbc: write output port\n");
-                /* Bit 2 of AMI flags is P22-P23 blocked (1 = yes, 0 = no),
-                   discovered by reverse-engineering the AOpen Vi15G BIOS. */
-                if (dev->ami_flags & 0x04) {
-                    /* If keyboard controller lines P22-P23 are blocked,
-                       we force them to remain unchanged. */
-                    dev->ib &= ~0x0c;
-                    dev->ib |= (dev->output_port & 0x0c);
-                }
-                write_output(dev, dev->ib | 0x01);
-                break;
-
-            case 0xd2: /* write to keyboard output buffer */
-                kbd_log("ATkbc: write to keyboard output buffer\n");
-                add_to_kbc_queue_front(dev, dev->ib, 0, 0x00);
-                break;
-
-            case 0xd3: /* write to mouse output buffer */
-                kbd_log("ATkbc: write to mouse output buffer\n");
-                if (mouse_write && ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF))
-                    keyboard_at_adddata_mouse(dev->ib);
-                break;
-
-            case 0xd4: /* write to mouse */
-                kbd_log("ATkbc: write to mouse (%02X)\n", dev->ib);
-
-                if (dev->ib == 0xbb)
-                    break;
-
-                if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-                    set_enable_mouse(dev, 1);
-                    if (mouse_write) {
-                        dev->mouse_wantcmd = 1;
-                        dev->mouse_dat = dev->ib;
-                        dev->kbc_state = KBC_STATE_SEND_MOUSE;
-                    } else
-                        add_to_kbc_queue_front(dev, 0xfe, 2, 0x40);
-                }
-                break;
-
-            default:
-                /*
-                 * Run the vendor-specific handler
-                 * if we have one. Otherwise, or if
-                 * it returns an error, log a bad
-                 * controller command.
-                 */
-                if (dev->write60_ven)
-                    bad = dev->write60_ven(dev, dev->ib);
-
-                if (bad) {
-                    kbd_log("ATkbc: bad controller command %02x data %02x\n", dev->command, dev->ib);
-                }
+                kbc_at_dev_queue_add(dev, 0xfe, 0);
         }
     }
 }
 
-static void
-kbd_write(uint16_t port, uint8_t val, void *priv)
+/*
+ * Initialize the device for use by the user.
+ *
+ * We also get called from the various machines.
+ */
+void *
+keyboard_at_init(const device_t *info)
 {
-    atkbd_t *dev = (atkbd_t *) priv;
+    atkbc_dev_t *dev = kbc_at_dev_init(DEV_KBD);
 
-    kbd_log((port == 0x61) ? "" : "[%04X:%08X] ATkbc: write(%04X) = %02X\n", CS, cpu_state.pc, port, val);
+    dev->name = info->name;
+    /* Key 14  = Japanese key next to backspace, scan code:                13 (Yen 7D);
+       Key 29  = US backslash, scan code:                                  5C (Backslash 2B);
+       Key 42  = European backslash, scan code:                            53 (Backslash 2B);
+       Key 45  = European key next to left shift, scan code:               13 (Key 56);
+       Key 56  = Japanese key next to right shift, scan code:              5C (Backslash 73);
+       Key 59  = Japanese key between left Ctrl and left Alt, scan code:   85 (Muhenkan 7B);
+       Key 63  = Japanese key between right Ctrl and right Alt, scan code: 86 (Henkan/Zenkouho 79);
+       Key 65? = Japanese key between right Ctrl and right Alt, scan code: 87 (Hiragana/Katakana 70).
+     */
+    dev->type = FLAG_PS2 | KBD_102_KEY /* device_get_config_int("type") */;
 
-    switch (port) {
-        case 0x60:
-            dev->status &= ~STAT_CD;
-            if (dev->want60 && (dev->command == 0xd1)) {
-                kbd_log("ATkbc: write output port\n");
+    keyboard_at_log("%s: type=%d\n", dev->name, dev->type);
 
-                /* Fast A20 - ignore all other bits. */
-                val = (val & 0x02) | (dev->output_port & 0xfd);
+    dev->process_cmd = keyboard_at_write;
+    dev->execute_bat = keyboard_at_bat;
 
-                /* Bit 2 of AMI flags is P22-P23 blocked (1 = yes, 0 = no),
-                   discovered by reverse-engineering the AOpeN Vi15G BIOS. */
-                if (dev->ami_flags & 0x04) {
-                    /* If keyboard controller lines P22-P23 are blocked,
-                       we force them to remain unchanged. */
-                    val &= ~0x0c;
-                    val |= (dev->output_port & 0x0c);
-                }
+    dev->scan        = &keyboard_scan;
 
-                write_output_fast_a20(dev, val | 0x01);
+    if (dev->port != NULL)
+        kbc_at_dev_reset(dev, 0);
 
-                dev->want60 = 0;                
-                dev->kbc_state = KBC_STATE_MAIN_IBF;
-                return;
-            }
-            break;
+    keyboard_send = add_data_kbd;
+    SavedKbd = dev;
 
-        case 0x64:
-            dev->status |= STAT_CD;
-            if (val == 0xd1) {
-                kbd_log("ATkbc: write output port\n");
-                dev->want60 = 1;
-                dev->kbc_state = KBC_STATE_KBC_PARAM;
-                dev->command = 0xd1;
-                return;
-            }
-            break;
-    }
+    inv_cmd_response = (dev->type & FLAG_PS2) ? 0xfe : 0xfa;
 
-    dev->ib = val;
-    dev->status |= STAT_IFULL;
-}
-
-static uint8_t
-kbd_read(uint16_t port, void *priv)
-{
-    atkbd_t *dev     = (atkbd_t *) priv;
-    uint8_t  ret     = 0xff;
-
-    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF)
-        cycles -= ISA_CYCLES(8);
-
-    switch (port) {
-        case 0x60:
-            ret = dev->out;
-            dev->status &= ~STAT_OFULL;
-            /* TODO: IRQ is only tied to OBF on the AT KBC, on the PS/2 KBC, it is controlled by a bit the
-                     output port (P2).
-                     This also means that in AT mode, the IRQ is level-triggered. */
-            if ((dev->flags & KBC_TYPE_MASK) < KBC_TYPE_PS2_NOREF)
-                picintc(1 << 1);
-            break;
-
-        case 0x64:
-            ret = dev->status;
-            break;
-
-        default:
-            kbd_log("ATkbc: read(%04x) invalid!\n",port);
-            break;
-    }
-
-    kbd_log((port == 0x61) ? "" : "[%04X:%08X] ATkbc: read (%04X) = %02X\n",  CS, cpu_state.pc, port, ret);
-
-    return (ret);
+    /* Return our private data to the I/O layer. */
+    return (dev);
 }
 
 static void
-kbd_reset(void *priv)
+keyboard_at_close(void *priv)
 {
-    atkbd_t *dev = (atkbd_t *) priv;
-    int      i;
-    uint8_t  kbc_ven = dev->flags & KBC_VEN_MASK;
-
-    dev->status = STAT_UNLOCKED;
-    dev->mem[0x20] = 0x01;
-    dev->mem[0x20] |= CCB_TRANSLATE;
-    dev->secr_phase                   = 0;
-    dev->key_wantdata                 = 0;
-
-    /* Set up the correct Video Type bits. */
-    if (!is286 || (kbc_ven == KBC_VEN_ACER))
-        dev->input_port = video_is_mda() ? 0xb0 : 0xf0;
-    else
-        dev->input_port = video_is_mda() ? 0xf0 : 0xb0;
-    kbd_log("ATkbc: input port = %02x\n", dev->input_port);
-
-    /* Enable keyboard, disable mouse. */
-    set_enable_kbd(dev, 0);
-    keyboard_scan = 0;
-    set_enable_mouse(dev, 0);
-    mouse_scan = 0;
-
-    dev->out_new = dev->out_new_mouse = -1;
-    for (i = 0; i < 3; i++)
-        kbc_queue_reset(dev, i);
-    dev->kbd_last_scan_code = 0;
-
-    dev->sc_or = 0;
-
-    keyboard_mode = 0x02;
-
-    memset(keyboard_set3_flags, 0, 512);
-
-    set_scancode_map(dev);
-
-    dev->ami_flags = ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) ? 0x01 : 0x00;
-    dev->ami_stat |= 0x02;
-
-    dev->output_port = 0xcd;
-    if ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-        write_output(dev, 0x4b);
-    } else {
-        /* The real thing writes CF and then AND's it with BF. */
-        write_output(dev, 0x8f);
-    }
-
-    /* Stage 1. */
-    dev->status = (dev->status & 0x0f) | (dev->input_port & 0xf0);
-
-    /* Reset the keyboard. */
-    kbd_key_reset(dev, 0);
-
-    /* Reset the mouse. */
-    kbd_aux_reset(dev, 0);
-}
-
-/* Reset the AT keyboard - this is needed for the PCI TRC and is done
-   until a better solution is found. */
-void
-keyboard_at_reset(void)
-{
-    kbd_reset(SavedKbd);
-}
-
-void
-kbc_at_a20_reset(void)
-{
-    if (SavedKbd) {
-        SavedKbd->output_port = 0xcd;
-        if ((SavedKbd->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) {
-            write_output(SavedKbd, 0x4b);
-        } else {
-            /* The real thing writes CF and then AND's it with BF. */
-            write_output(SavedKbd, 0x8f);
-        }
-    }
-}
-
-static void
-kbd_close(void *priv)
-{
-    atkbd_t *dev = (atkbd_t *) priv;
-    int i, max_ports = ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) ? 2 : 1;
-
-    kbd_reset(dev);
-
-    /* Stop timers. */
-    timer_disable(&dev->send_delay_timer);
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
 
     keyboard_scan = 0;
     keyboard_send = NULL;
@@ -2832,427 +985,47 @@ kbd_close(void *priv)
 
     SavedKbd = NULL;
 
-    for (i = 0; i < max_ports; i++) {
-        if (kbc_ports[i] != NULL) {
-            free(kbc_ports[i]);
-            kbc_ports[i] = NULL;
-        }
-    }
-
     free(dev);
 }
 
-static void *
-kbd_init(const device_t *info)
-{
-    atkbd_t *dev;
-    int i, max_ports;
-
-    dev = (atkbd_t *) malloc(sizeof(atkbd_t));
-    memset(dev, 0x00, sizeof(atkbd_t));
-
-    dev->flags = info->local;
-    dev->pci = !!(info->flags & DEVICE_PCI);
-
-    /* We need this, sadly. */
-    SavedKbd = dev;
-
-    video_reset(gfxcard[0]);
-    kbd_reset(dev);
-
-    io_sethandler(0x0060, 1, kbd_read, NULL, NULL, kbd_write, NULL, NULL, dev);
-    io_sethandler(0x0064, 1, kbd_read, NULL, NULL, kbd_write, NULL, NULL, dev);
-    keyboard_send = add_data_kbd;
-
-    timer_add(&dev->send_delay_timer, kbd_poll, dev, 1);
-    timer_add(&dev->pulse_cb, pulse_poll, dev, 0);
-
-    dev->write60_ven = NULL;
-    dev->write64_ven = NULL;
-
-    switch (dev->flags & KBC_VEN_MASK) {
-        case KBC_VEN_ACER:
-        case KBC_VEN_GENERIC:
-        case KBC_VEN_NCR:
-        case KBC_VEN_IBM_PS1:
-            dev->write64_ven = write64_generic;
-            break;
-
-        case KBC_VEN_OLIVETTI:
-            dev->write64_ven = write64_olivetti;
-            break;
-
-        case KBC_VEN_AMI:
-        case KBC_VEN_INTEL_AMI:
-        case KBC_VEN_ALI:
-        case KBC_VEN_TG:
-        case KBC_VEN_TG_GREEN:
-            dev->write60_ven = write60_ami;
-            dev->write64_ven = write64_ami;
-            break;
-
-        case KBC_VEN_IBM_MCA:
-            dev->write64_ven = write64_ibm_mca;
-            break;
-
-        case KBC_VEN_QUADTEL:
-            dev->write60_ven = write60_quadtel;
-            dev->write64_ven = write64_quadtel;
-            break;
-
-        case KBC_VEN_TOSHIBA:
-            dev->write60_ven = write60_toshiba;
-            dev->write64_ven = write64_toshiba;
-            break;
+static const device_config_t keyboard_at_config[] = {
+  // clang-format off
+    {
+        .name = "type",
+        .description = "Type",
+        .type = CONFIG_SELECTION,
+        .default_string = "",
+        .default_int = 2,
+        .file_filter = "",
+        .spinner = { 0 },
+        .selection = {
+            { .description = "AT 84-key",          .value = FLAG_AT  | KBD_84_KEY  },
+            { .description = "AT 101/102/106-key", .value = FLAG_AT  | KBD_101_KEY },
+            { .description = "AT Korean",          .value = FLAG_AT  | KBD_KOREAN  },
+            { .description = "PS/2 101-key",       .value = FLAG_PS2 | KBD_101_KEY },
+            { .description = "PS/2 102-key",       .value = FLAG_PS2 | KBD_102_KEY },
+            { .description = "PS/2 106-key JIS",   .value = FLAG_PS2 | KBD_JIS     },
+            { .description = "PS/2 Korean",        .value = FLAG_PS2 | KBD_KOREAN  },
+            { .description = ""                                                    }
+        }
+    },
+    {
+        .name = "", .description = "", .type = CONFIG_END
     }
-
-    max_ports = ((dev->flags & KBC_TYPE_MASK) >= KBC_TYPE_PS2_NOREF) ? 2 : 1;
-
-    for (i = 0; i < max_ports; i++) {
-        kbc_ports[i] = (kbc_port_t *) malloc(sizeof(kbc_port_t));
-        memset(kbc_ports[i], 0x00, sizeof(kbc_port_t));
-    }
-
-    return (dev);
-}
-
-const device_t keyboard_at_device = {
-    .name          = "PC/AT Keyboard",
-    .internal_name = "keyboard_at",
-    .flags         = 0,
-    .local         = KBC_TYPE_ISA | KBC_VEN_GENERIC,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
+  // clang-format on
 };
 
-const device_t keyboard_at_ami_device = {
-    .name          = "PC/AT Keyboard (AMI)",
-    .internal_name = "keyboard_at_ami",
-    .flags         = 0,
-    .local         = KBC_TYPE_ISA | KBC_VEN_AMI,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
+/* TODO: Add more keyboard types. */
+const device_t keyboard_at_generic_device = {
+    .name          = "Standard AT or PS/2 Keyboard",
+    .internal_name = "ps2",
+    .flags         = DEVICE_PS2,
+    .local         = 0,
+    .init          = keyboard_at_init,
+    .close         = keyboard_at_close,
+    .reset         = NULL,
+    { .poll = NULL },
     .speed_changed = NULL,
     .force_redraw  = NULL,
-    .config        = NULL
+    .config        = keyboard_at_config
 };
-
-const device_t keyboard_at_tg_ami_device = {
-    .name          = "PC/AT Keyboard (TriGem AMI)",
-    .internal_name = "keyboard_at_tg_ami",
-    .flags         = 0,
-    .local         = KBC_TYPE_ISA | KBC_VEN_TG,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_at_toshiba_device = {
-    .name          = "PC/AT Keyboard (Toshiba)",
-    .internal_name = "keyboard_at_toshiba",
-    .flags         = 0,
-    .local         = KBC_TYPE_ISA | KBC_VEN_TOSHIBA,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_at_olivetti_device = {
-    .name          = "PC/AT Keyboard (Olivetti)",
-    .internal_name = "keyboard_at_olivetti",
-    .flags         = 0,
-    .local         = KBC_TYPE_ISA | KBC_VEN_OLIVETTI,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_at_ncr_device = {
-    .name          = "PC/AT Keyboard (NCR)",
-    .internal_name = "keyboard_at_ncr",
-    .flags         = 0,
-    .local         = KBC_TYPE_ISA | KBC_VEN_NCR,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_device = {
-    .name          = "PS/2 Keyboard",
-    .internal_name = "keyboard_ps2",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_GENERIC,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_ps1_device = {
-    .name          = "PS/2 Keyboard (IBM PS/1)",
-    .internal_name = "keyboard_ps2_ps1",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_IBM_PS1,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_ps1_pci_device = {
-    .name          = "PS/2 Keyboard (IBM PS/1)",
-    .internal_name = "keyboard_ps2_ps1_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_IBM_PS1,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_xi8088_device = {
-    .name          = "PS/2 Keyboard (Xi8088)",
-    .internal_name = "keyboard_ps2_xi8088",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_1 | KBC_VEN_GENERIC,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_ami_device = {
-    .name          = "PS/2 Keyboard (AMI)",
-    .internal_name = "keyboard_ps2_ami",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_AMI,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_tg_ami_device = {
-    .name          = "PS/2 Keyboard (TriGem AMI)",
-    .internal_name = "keyboard_ps2_tg_ami",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_TG,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_mca_device = {
-    .name          = "PS/2 Keyboard",
-    .internal_name = "keyboard_ps2_mca",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_1 | KBC_VEN_IBM_MCA,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_mca_2_device = {
-    .name          = "PS/2 Keyboard",
-    .internal_name = "keyboard_ps2_mca_2",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_2 | KBC_VEN_IBM_MCA,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_quadtel_device = {
-    .name          = "PS/2 Keyboard (Quadtel/MegaPC)",
-    .internal_name = "keyboard_ps2_quadtel",
-    .flags         = 0,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_QUADTEL,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_pci_device = {
-    .name          = "PS/2 Keyboard",
-    .internal_name = "keyboard_ps2_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_GENERIC,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_ami_pci_device = {
-    .name          = "PS/2 Keyboard (AMI)",
-    .internal_name = "keyboard_ps2_ami_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_AMI,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_ali_pci_device = {
-    .name          = "PS/2 Keyboard (ALi M5123/M1543C)",
-    .internal_name = "keyboard_ps2_ali_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_ALI,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_intel_ami_pci_device = {
-    .name          = "PS/2 Keyboard (AMI)",
-    .internal_name = "keyboard_ps2_intel_ami_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_INTEL_AMI,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_tg_ami_pci_device = {
-    .name          = "PS/2 Keyboard (TriGem AMI)",
-    .internal_name = "keyboard_ps2_tg_ami_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_TG,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-const device_t keyboard_ps2_acer_pci_device = {
-    .name          = "PS/2 Keyboard (Acer 90M002A)",
-    .internal_name = "keyboard_ps2_acer_pci",
-    .flags         = DEVICE_PCI,
-    .local         = KBC_TYPE_PS2_NOREF | KBC_VEN_ACER,
-    .init          = kbd_init,
-    .close         = kbd_close,
-    .reset         = kbd_reset,
-    { .available = NULL },
-    .speed_changed = NULL,
-    .force_redraw  = NULL,
-    .config        = NULL
-};
-
-void
-keyboard_at_set_mouse(void (*func)(uint8_t val, void *priv), void *priv)
-{
-    mouse_write = func;
-    mouse_p     = priv;
-}
-
-void
-keyboard_at_adddata_mouse(uint8_t val)
-{
-    atkbd_t *dev = SavedKbd;
-
-    if (!mouse_scan || (dev->mouse_queue_end >= 16)) {
-        kbd_log("ATkbc: Unable to add to queue, conditions: %i, %i\n", !mouse_scan, (dev->mouse_queue_end >= 16));
-        return;
-    }
-    kbc_queue_add(dev, val, 2);
-}
-
-void
-keyboard_at_adddata_mouse_cmd(uint8_t val)
-{
-    atkbd_t *dev = SavedKbd;
-
-    if (dev->mouse_cmd_queue_end >= 16) {
-        kbd_log("ATkbc: Unable to add to queue, dev->mouse_cmd_queue_end >= 16\n");
-        return;
-    }
-    kbc_queue_add(dev, val, 3);
-}
-
-uint8_t
-keyboard_at_mouse_pos(void)
-{
-    atkbd_t *dev = SavedKbd;
-
-    return ((dev->mouse_queue_end - dev->mouse_queue_start) & 0xf);
-}
-
-void
-keyboard_at_set_a20_key(int state)
-{
-    atkbd_t *dev = SavedKbd;
-
-    write_output(dev, (dev->output_port & 0xfd) | ((!!state) << 1));
-}
diff --git a/src/device/mouse.c b/src/device/mouse.c
index 13d9999c7..0ee714377 100644
--- a/src/device/mouse.c
+++ b/src/device/mouse.c
@@ -102,6 +102,8 @@ static int             mouse_nbut;
 static int (*mouse_dev_poll)(int x, int y, int z, int b, void *priv);
 static void (*mouse_poll_ex)(void) = NULL;
 
+static double          sample_rate = 200.0;
+
 #ifdef ENABLE_MOUSE_LOG
 int mouse_do_log = ENABLE_MOUSE_LOG;
 
@@ -153,7 +155,7 @@ static void
 mouse_timer_poll(void *priv)
 {
     /* Poll at 255 Hz, maximum supported by PS/2 mic. */
-    timer_on_auto(&mouse_timer, 1000000.0 / 255.0);
+    timer_on_auto(&mouse_timer, 1000000.0 / sample_rate);
 
 #ifdef USE_GDBSTUB /* avoid a KBC FIFO overflow when CPU emulation is stalled */
     if (gdbstub_step == GDBSTUB_EXEC)
@@ -161,6 +163,15 @@ mouse_timer_poll(void *priv)
         mouse_process();
 }
 
+void
+mouse_set_sample_rate(double new_rate)
+{
+    timer_stop(&mouse_timer);
+
+    sample_rate = new_rate;
+    timer_on_auto(&mouse_timer, 1000000.0 / sample_rate);
+}
+
 void
 mouse_reset(void)
 {
@@ -179,15 +190,16 @@ mouse_reset(void)
     if (mouse_type == 0)
         return;
 
+    timer_add(&mouse_timer, mouse_timer_poll, NULL, 0);
+
+    /* Poll at 100 Hz, the default of a PS/2 mouse. */
+    sample_rate = 100.0;
+    timer_on_auto(&mouse_timer, 1000000.0 / sample_rate);
+
     mouse_curr = mouse_devices[mouse_type].device;
 
     if (mouse_curr != NULL)
         mouse_priv = device_add(mouse_curr);
-
-    timer_add(&mouse_timer, mouse_timer_poll, NULL, 0);
-
-    /* Poll at 255 Hz, maximum supported by PS/2 mic. */
-    timer_on_auto(&mouse_timer, 1000000.0 / 255.0);
 }
 
 /* Callback from the hardware driver. */
diff --git a/src/device/mouse_ps2.c b/src/device/mouse_ps2.c
index 1c8e0334d..7d9730f28 100644
--- a/src/device/mouse_ps2.c
+++ b/src/device/mouse_ps2.c
@@ -30,22 +30,7 @@ enum {
     MODE_ECHO
 };
 
-typedef struct {
-    const char *name; /* name of this device */
-    int8_t      type; /* type of this device */
-
-    int mode;
-
-    uint16_t flags;
-    uint8_t  resolution;
-    uint8_t  sample_rate;
-
-    uint8_t command;
-
-    int x, y, z, b;
-
-    uint8_t last_data[6];
-} mouse_t;
+#define FLAG_EXPLORER 0x200 /* Has 5 buttons */
 #define FLAG_5BTN    0x100 /* using Intellimouse Optical mode */
 #define FLAG_INTELLI 0x80  /* device is IntelliMouse */
 #define FLAG_INTMODE 0x40  /* using Intellimouse mode */
@@ -76,13 +61,13 @@ mouse_ps2_log(const char *fmt, ...)
 void
 mouse_clear_data(void *priv)
 {
-    mouse_t *dev = (mouse_t *) priv;
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
 
     dev->flags &= ~FLAG_CTRLDAT;
 }
 
 static void
-ps2_report_coordinates(mouse_t *dev, int cmd)
+ps2_report_coordinates(atkbc_dev_t *dev, int main)
 {
     uint8_t buff[3] = { 0x08, 0x00, 0x00 };
     int temp_z;
@@ -123,15 +108,9 @@ ps2_report_coordinates(mouse_t *dev, int cmd)
     buff[1] = (dev->x & 0xff);
     buff[2] = (dev->y & 0xff);
 
-    if (cmd) {
-        keyboard_at_adddata_mouse_cmd(buff[0]);
-        keyboard_at_adddata_mouse_cmd(buff[1]);
-        keyboard_at_adddata_mouse_cmd(buff[2]);
-    } else {
-        keyboard_at_adddata_mouse(buff[0]);
-        keyboard_at_adddata_mouse(buff[1]);
-        keyboard_at_adddata_mouse(buff[2]);
-    }
+    kbc_at_dev_queue_add(dev, buff[0], main);
+    kbc_at_dev_queue_add(dev, buff[1], main);
+    kbc_at_dev_queue_add(dev, buff[2], main);
     if (dev->flags & FLAG_INTMODE) {
         temp_z = dev->z & 0x0f;
         if ((dev->flags & FLAG_5BTN)) {
@@ -144,144 +123,192 @@ ps2_report_coordinates(mouse_t *dev, int cmd)
             if (temp_z & 0x08)
                 temp_z |= 0xf0;
         }
-        if (cmd)
-            keyboard_at_adddata_mouse_cmd(temp_z);
-        else
-            keyboard_at_adddata_mouse(temp_z);
+        kbc_at_dev_queue_add(dev, temp_z, main);
     }
 
     dev->x = dev->y = dev->z = 0;
 }
 
 static void
-ps2_write(uint8_t val, void *priv)
+ps2_set_defaults(atkbc_dev_t *dev)
 {
-    mouse_t *dev = (mouse_t *) priv;
-    uint8_t  temp;
+    dev->mode = MODE_STREAM;
+    dev->rate = 100;
+    mouse_set_sample_rate(100.0);
+    dev->resolution = 2;
+    dev->flags &= 0x88;
+    mouse_scan = 0;
+}
+
+static void
+ps2_bat(void *priv)
+{
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
+
+    ps2_set_defaults(dev);
+
+    kbc_at_dev_queue_add(dev, 0xaa, 0);
+    kbc_at_dev_queue_add(dev, 0x00, 0);
+}
+
+static void
+ps2_write(void *priv)
+{
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
+    uint8_t  temp, val;
+    static uint8_t last_data[6] = { 0x00 };
+
+    if (dev->port == NULL)
+        return;
+
+    val = dev->port->dat;
+
+    dev->state = DEV_STATE_MAIN_OUT;
 
     if (dev->flags & FLAG_CTRLDAT) {
         dev->flags &= ~FLAG_CTRLDAT;
 
         if (val == 0xff)
-            goto mouse_reset;
-
-        switch (dev->command) {
+            kbc_at_dev_reset(dev, 1);
+        else  switch (dev->command) {
             case 0xe8: /* set mouse resolution */
                 dev->resolution = val;
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
+                mouse_ps2_log("%s: Set mouse resolution [%02X]\n", dev->name, val);
                 break;
 
             case 0xf3: /* set sample rate */
-                dev->sample_rate = val;
-                keyboard_at_adddata_mouse_cmd(0xfa); /* Command response */
+                dev->rate = val;
+                mouse_set_sample_rate((double) val);
+                kbc_at_dev_queue_add(dev, 0xfa, 0); /* Command response */
+                mouse_ps2_log("%s: Set sample rate [%02X]\n", dev->name, val);
                 break;
 
             default:
-                keyboard_at_adddata_mouse_cmd(0xfc);
+                kbc_at_dev_queue_add(dev, 0xfc, 0);
         }
     } else {
         dev->command = val;
 
         switch (dev->command) {
             case 0xe6: /* set scaling to 1:1 */
+                mouse_ps2_log("%s: Set scaling to 1:1\n", dev->name);
                 dev->flags &= ~FLAG_SCALED;
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
                 break;
 
             case 0xe7: /* set scaling to 2:1 */
+                mouse_ps2_log("%s: Set scaling to 2:1\n", dev->name);
                 dev->flags |= FLAG_SCALED;
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
                 break;
 
             case 0xe8: /* set mouse resolution */
+                mouse_ps2_log("%s: Set mouse resolution\n", dev->name);
                 dev->flags |= FLAG_CTRLDAT;
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
+                dev->state = DEV_STATE_MAIN_WANT_IN;
                 break;
 
             case 0xe9: /* status request */
-                keyboard_at_adddata_mouse_cmd(0xfa);
-                temp = (dev->flags & 0x30);
+                mouse_ps2_log("%s: Status request\n", dev->name);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
+                temp = (dev->flags & 0x20);
+                if (mouse_scan)
+                    temp |= FLAG_ENABLED;
                 if (mouse_buttons & 1)
                     temp |= 4;
                 if (mouse_buttons & 2)
                     temp |= 1;
                 if ((mouse_buttons & 4) && (dev->flags & FLAG_INTELLI))
                     temp |= 2;
-                keyboard_at_adddata_mouse_cmd(temp);
-                keyboard_at_adddata_mouse_cmd(dev->resolution);
-                keyboard_at_adddata_mouse_cmd(dev->sample_rate);
+                kbc_at_dev_queue_add(dev, temp, 0);
+                kbc_at_dev_queue_add(dev, dev->resolution, 0);
+                kbc_at_dev_queue_add(dev, dev->rate, 0);
                 break;
 
             case 0xea: /* set stream */
+                mouse_ps2_log("%s: Set stream\n", dev->name);
                 dev->flags &= ~FLAG_CTRLDAT;
+                dev->mode = MODE_STREAM;
                 mouse_scan = 1;
-                keyboard_at_adddata_mouse_cmd(0xfa); /* ACK for command byte */
+                kbc_at_dev_queue_add(dev, 0xfa, 0); /* ACK for command byte */
                 break;
 
             case 0xeb: /* Get mouse data */
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                mouse_ps2_log("%s: Get mouse data\n", dev->name);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
 
-                ps2_report_coordinates(dev, 1);
+                ps2_report_coordinates(dev, 0);
+                break;
+
+            case 0xf0: /* set remote */
+                mouse_ps2_log("%s: Set remote\n", dev->name);
+                dev->flags &= ~FLAG_CTRLDAT;
+                dev->mode = MODE_REMOTE;
+                mouse_scan = 1;
+                kbc_at_dev_queue_add(dev, 0xfa, 0); /* ACK for command byte */
                 break;
 
             case 0xf2: /* read ID */
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                mouse_ps2_log("%s: Read ID\n", dev->name);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
                 if (dev->flags & FLAG_INTMODE)
-                    keyboard_at_adddata_mouse_cmd((dev->flags & FLAG_5BTN) ? 0x04 : 0x03);
+                    kbc_at_dev_queue_add(dev, (dev->flags & FLAG_5BTN) ? 0x04 : 0x03, 0);
                 else
-                    keyboard_at_adddata_mouse_cmd(0x00);
+                    kbc_at_dev_queue_add(dev, 0x00, 0);
                 break;
 
-            case 0xf3: /* set command mode */
+            case 0xf3: /* set sample rate */
+                mouse_ps2_log("%s: Set sample rate\n", dev->name);
                 dev->flags |= FLAG_CTRLDAT;
-                keyboard_at_adddata_mouse_cmd(0xfa); /* ACK for command byte */
+                kbc_at_dev_queue_add(dev, 0xfa, 0); /* ACK for command byte */
+                dev->state = DEV_STATE_MAIN_WANT_IN;
                 break;
 
             case 0xf4: /* enable */
-                dev->flags |= FLAG_ENABLED;
+                mouse_ps2_log("%s: Enable\n", dev->name);
                 mouse_scan = 1;
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
                 break;
 
             case 0xf5: /* disable */
-                dev->flags &= ~FLAG_ENABLED;
+                mouse_ps2_log("%s: Disable\n", dev->name);
                 mouse_scan = 0;
-                keyboard_at_adddata_mouse_cmd(0xfa);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
                 break;
 
             case 0xf6: /* set defaults */
+                mouse_ps2_log("%s: Set defaults\n", dev->name);
+                ps2_set_defaults(dev);
+                kbc_at_dev_queue_add(dev, 0xfa, 0);
+                break;
+
             case 0xff: /* reset */
-mouse_reset:
-                dev->mode = MODE_STREAM;
-                dev->flags &= 0x88;
-                mouse_scan = 1;
-                keyboard_at_mouse_reset();
-                keyboard_at_adddata_mouse_cmd(0xfa);
-                if (dev->command == 0xff) {
-                    keyboard_at_adddata_mouse_cmd(0xaa);
-                    keyboard_at_adddata_mouse_cmd(0x00);
-                }
+                mouse_ps2_log("%s: Reset\n", dev->name);
+                kbc_at_dev_reset(dev, 1);
                 break;
 
             default:
-                keyboard_at_adddata_mouse_cmd(0xfe);
+                kbc_at_dev_queue_add(dev, 0xfe, 0);
         }
     }
 
     if (dev->flags & FLAG_INTELLI) {
         for (temp = 0; temp < 5; temp++)
-            dev->last_data[temp] = dev->last_data[temp + 1];
+            last_data[temp] = last_data[temp + 1];
 
-        dev->last_data[5] = val;
+        last_data[5] = val;
 
-        if ((dev->last_data[0] == 0xf3) && (dev->last_data[1] == 0xc8) &&
-            (dev->last_data[2] == 0xf3) && (dev->last_data[3] == 0x64) &&
-            (dev->last_data[4] == 0xf3) && (dev->last_data[5] == 0x50))
+        if ((last_data[0] == 0xf3) && (last_data[1] == 0xc8) &&
+            (last_data[2] == 0xf3) && (last_data[3] == 0x64) &&
+            (last_data[4] == 0xf3) && (last_data[5] == 0x50))
             dev->flags |= FLAG_INTMODE;
 
-        if ((dev->flags & FLAG_INTMODE) && (dev->last_data[0] == 0xf3) && (dev->last_data[1] == 0xc8) &&
-            (dev->last_data[2] == 0xf3) && (dev->last_data[3] == 0xc8) &&
-            (dev->last_data[4] == 0xf3) && (dev->last_data[5] == 0x50))
+        if ((dev->flags & FLAG_EXPLORER) && (dev->flags & FLAG_INTMODE) &&
+            (last_data[0] == 0xf3) && (last_data[1] == 0xc8) &&
+            (last_data[2] == 0xf3) && (last_data[3] == 0xc8) &&
+            (last_data[4] == 0xf3) && (last_data[5] == 0x50))
             dev->flags |= FLAG_5BTN;
     }
 }
@@ -289,30 +316,18 @@ mouse_reset:
 static int
 ps2_poll(int x, int y, int z, int b, double abs_x, double abs_y, void *priv)
 {
-    mouse_t *dev = (mouse_t *) priv;
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
 
-    if (!x && !y && !z && (b == dev->b))
-        return (0xff);
-
-#if 0
-    if (!(dev->flags & FLAG_ENABLED))
-        return(0xff);
-#endif
-
-    if (!mouse_scan)
+    if (!mouse_scan || (!x && !y && !z && (b == dev->b)))
         return (0xff);
 
     dev->x += x;
     dev->y -= y;
     dev->z -= z;
-#if 0
-    if ((dev->mode == MODE_STREAM) && (dev->flags & FLAG_ENABLED) && (keyboard_at_mouse_pos() < 13)) {
-#else
-    if ((dev->mode == MODE_STREAM) && (keyboard_at_mouse_pos() < 13)) {
-#endif
+    if ((dev->mode == MODE_STREAM) && (kbc_at_dev_queue_pos(dev, 1) < 13)) {
         dev->b = b;
 
-        ps2_report_coordinates(dev, 0);
+        ps2_report_coordinates(dev, 1);
     }
 
     return (0);
@@ -326,11 +341,9 @@ ps2_poll(int x, int y, int z, int b, double abs_x, double abs_y, void *priv)
 void *
 mouse_ps2_init(const device_t *info)
 {
-    mouse_t *dev;
+    atkbc_dev_t *dev = kbc_at_dev_init(DEV_AUX);
     int      i;
 
-    dev = (mouse_t *) malloc(sizeof(mouse_t));
-    memset(dev, 0x00, sizeof(mouse_t));
     dev->name = info->name;
     dev->type = info->local;
 
@@ -338,18 +351,25 @@ mouse_ps2_init(const device_t *info)
     i         = device_get_config_int("buttons");
     if (i > 2)
         dev->flags |= FLAG_INTELLI;
+    if (i > 4)
+        dev->flags |= FLAG_EXPLORER;
 
-    if (i == 4)
+    if (i >= 4)
         i = 3;
 
-    /* Hook into the general AT Keyboard driver. */
-    keyboard_at_set_mouse(ps2_write, dev);
-
     mouse_ps2_log("%s: buttons=%d\n", dev->name, i);
 
     /* Tell them how many buttons we have. */
     mouse_set_buttons(i);
 
+    dev->process_cmd = ps2_write;
+    dev->execute_bat = ps2_bat;
+
+    dev->scan        = &mouse_scan;
+
+    if (dev->port != NULL)
+        kbc_at_dev_reset(dev, 0);
+
     /* Return our private data to the I/O layer. */
     return (dev);
 }
@@ -357,10 +377,7 @@ mouse_ps2_init(const device_t *info)
 static void
 ps2_close(void *priv)
 {
-    mouse_t *dev = (mouse_t *) priv;
-
-    /* Unhook from the general AT Keyboard driver. */
-    keyboard_at_set_mouse(NULL, NULL);
+    atkbc_dev_t *dev = (atkbc_dev_t *) priv;
 
     free(dev);
 }
diff --git a/src/device/mouse_wacom_tablet.c b/src/device/mouse_wacom_tablet.c
index 09d98083e..2fa909286 100644
--- a/src/device/mouse_wacom_tablet.c
+++ b/src/device/mouse_wacom_tablet.c
@@ -356,7 +356,7 @@ wacom_write(struct serial_s *serial, void *priv, uint8_t data)
             uint8_t out_of_range_data = wacom->settings_bits.out_of_range_data;
             wacom->settings_bits.out_of_range_data = !!out_of_range_data;
         } else if (!memcmp(wacom->data_rec, "RQ", 2)) {
-            uint8_t remote_mode = 0;
+            int remote_mode = 0;
             sscanf((const char *) wacom->data_rec, "RQ%d", &remote_mode);
             wacom->settings_bits.remote_mode = !!remote_mode;
             if (wacom->settings_bits.remote_mode)
@@ -372,27 +372,27 @@ wacom_write(struct serial_s *serial, void *priv, uint8_t data)
         } else if (wacom->tablet_type->type == WACOM_TYPE_IV && wacom->data_rec[0] == '~') {
             if (!memcmp(wacom->data_rec, "~*", 2)) {
                 uint32_t settings_dword = wacom->settings;
-                if (strstr(wacom->data_rec, ",")) {
+                if (strstr((const char *) wacom->data_rec, ",")) {
                     uint32_t x_res = wacom->x_res, y_res = wacom->y_res;
                     uint32_t increment = wacom->increment;
                     uint32_t interval = wacom->interval;
 
-                    sscanf("~*%08X,%d,%d,%d,%d", wacom->data_rec, &settings_dword, &increment, &interval, &x_res, &y_res);
+                    sscanf((const char *) wacom->data_rec, "~*%08X,%d,%d,%d,%d", &settings_dword, &increment, &interval, &x_res, &y_res);
                     
                     wacom->interval = interval;
                     wacom->increment = increment;
                     wacom->x_res = x_res;
                     wacom->y_res = y_res;
                 } else {
-                    sscanf("~*%X", wacom->data_rec, &settings_dword);
+                    sscanf((const char *) wacom->data_rec, "~*%X", &settings_dword);
                 }
                 wacom_process_settings_dword(wacom, settings_dword);
             } else if (!memcmp(wacom->data_rec, "~C", 2)) {
-                fifo8_push_all(&wacom->data, "~C5039,3779\r", sizeof("~C5039,3779\r") - 1);
+                fifo8_push_all(&wacom->data, (const uint8_t *) "~C5039,3779\r", sizeof("~C5039,3779\r") - 1);
             } else if (!memcmp(wacom->data_rec, "~R", 2)) {
                 uint8_t data[256] = { 0 };
-                snprintf(data, sizeof(data), "~*%08X,%d,%d,%d,%d\r", wacom->settings, wacom->increment, wacom->interval, wacom->x_res, wacom->y_res);
-                fifo8_push_all(&wacom->data, data, strlen(data));
+                snprintf((char *)data, sizeof(data), (const char *) "~*%08X,%d,%d,%d,%d\r", wacom->settings, wacom->increment, wacom->interval, wacom->x_res, wacom->y_res);
+                fifo8_push_all(&wacom->data, data, strlen((const char *) data));
             }
         }
     }
@@ -458,7 +458,7 @@ wacom_transmit_prepare(mouse_wacom_t *wacom, int x, int y)
     if (wacom->transmit_id) {
         uint8_t data[128] = { 0 };
         snprintf((char *) data, sizeof(data), "%s", wacom->tablet_type->id);
-        fifo8_push_all(&wacom->data, data, strlen(data));
+        fifo8_push_all(&wacom->data, data, strlen((char *)data));
         wacom->transmit_id = 0;
         return;
     }
@@ -520,7 +520,7 @@ wacom_transmit_prepare(mouse_wacom_t *wacom, int x, int y)
         snprintf((char *) data, sizeof(data), "*,%05d,%05d,%d\r\n",
                  wacom->abs_x, wacom->abs_y,
                  wacom->pressure_mode ? ((wacom->b & 0x1) ? (uint8_t) -31 : (uint8_t) 15) : ((wacom->b & 0x1) ? 21 : 00));
-        fifo8_push_all(&wacom->data, data, strlen(data));
+        fifo8_push_all(&wacom->data, data, strlen((char *)data));
     }
 }
 
diff --git a/src/floppy/fdd_86f.c b/src/floppy/fdd_86f.c
index 0267cfe81..ff4acb989 100644
--- a/src/floppy/fdd_86f.c
+++ b/src/floppy/fdd_86f.c
@@ -2550,7 +2550,7 @@ uint16_t
 d86f_prepare_pretrack(int drive, int side, int iso)
 {
     d86f_t  *dev = d86f[drive];
-    uint16_t i, pos;
+    uint16_t pos;
     int      mfm;
     int      real_gap0_len;
     int      sync_len;
@@ -2575,22 +2575,22 @@ d86f_prepare_pretrack(int drive, int side, int iso)
 
     d86f_destroy_linked_lists(drive, side);
 
-    for (i = 0; i < raw_size; i++)
+    for (uint32_t i = 0; i < raw_size; i++)
         d86f_write_direct_common(drive, side, gap_fill, 0, i);
 
     pos = 0;
 
     if (!iso) {
-        for (i = 0; i < real_gap0_len; i++) {
+        for (int i = 0; i < real_gap0_len; i++) {
             d86f_write_direct_common(drive, side, gap_fill, 0, pos);
             pos = (pos + 1) % raw_size;
         }
-        for (i = 0; i < sync_len; i++) {
+        for (int i = 0; i < sync_len; i++) {
             d86f_write_direct_common(drive, side, 0, 0, pos);
             pos = (pos + 1) % raw_size;
         }
         if (mfm) {
-            for (i = 0; i < 3; i++) {
+            for (uint8_t i = 0; i < 3; i++) {
                 d86f_write_direct_common(drive, side, 0x2452, 1, pos);
                 pos = (pos + 1) % raw_size;
             }
@@ -2600,7 +2600,7 @@ d86f_prepare_pretrack(int drive, int side, int iso)
         pos = (pos + 1) % raw_size;
     }
 
-    for (i = 0; i < real_gap1_len; i++) {
+    for (int i = 0; i < real_gap1_len; i++) {
         d86f_write_direct_common(drive, side, gap_fill, 0, pos);
         pos = (pos + 1) % raw_size;
     }
diff --git a/src/include/86box/86box.h b/src/include/86box/86box.h
index b88fa24ef..d472a3e03 100644
--- a/src/include/86box/86box.h
+++ b/src/include/86box/86box.h
@@ -55,6 +55,14 @@
 #define BCD16(x)  ((((x) / 1000) << 12) | (((x) / 100) << 8) | BCD8(x))
 #define BCD32(x)  ((((x) / 10000000) << 28) | (((x) / 1000000) << 24) | (((x) / 100000) << 20) | (((x) / 10000) << 16) | BCD16(x))
 
+#if defined(__GNUC__) || defined(__clang__)
+#    define UNLIKELY(x) __builtin_expect((x), 0)
+#    define LIKELY(x)   __builtin_expect((x), 1)
+#else
+#    define UNLIKELY(x) (x)
+#    define LIKELY(x)   (x)
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -121,7 +129,8 @@ extern uint32_t mem_size;         /* (C) memory size (Installed on system board)
 extern uint32_t isa_mem_size;     /* (C) memory size (ISA Memory Cards) */
 extern int      cpu,              /* (C) cpu type */
     cpu_use_dynarec,              /* (C) cpu uses/needs Dyna */
-    fpu_type;                     /* (C) fpu type */
+    fpu_type,                     /* (C) fpu type */
+    fpu_softfloat;                /* (C) fpu uses softfloat */
 extern int time_sync;             /* (C) enable time sync */
 extern int hdd_format_type;       /* (C) hard disk file format */
 extern int confirm_reset,         /* (C) enable reset confirmation */
diff --git a/src/include/86box/device.h b/src/include/86box/device.h
index f85be460d..2390e5128 100644
--- a/src/include/86box/device.h
+++ b/src/include/86box/device.h
@@ -57,21 +57,24 @@
 #define CONFIG_SERPORT   12
 
 enum {
-    DEVICE_PCJR      = 2,         /* requires an IBM PCjr */
-    DEVICE_AT        = 4,         /* requires an AT-compatible system */
-    DEVICE_PS2       = 8,         /* requires a PS/1 or PS/2 system */
-    DEVICE_ISA       = 0x10,      /* requires the ISA bus */
-    DEVICE_CBUS      = 0x20,      /* requires the C-BUS bus */
-    DEVICE_MCA       = 0x40,      /* requires the MCA bus */
-    DEVICE_EISA      = 0x80,      /* requires the EISA bus */
-    DEVICE_VLB       = 0x100,     /* requires the PCI bus */
-    DEVICE_PCI       = 0x200,     /* requires the VLB bus */
-    DEVICE_AGP       = 0x400,     /* requires the AGP bus */
-    DEVICE_AC97      = 0x800,     /* requires the AC'97 bus */
-    DEVICE_COM       = 0x1000,    /* requires a serial port */
-    DEVICE_LPT       = 0x2000,    /* requires a parallel port */
+    DEVICE_PCJR      = 2,          /* requires an IBM PCjr */
+    DEVICE_AT        = 4,          /* requires an AT-compatible system */
+    DEVICE_PS2       = 8,          /* requires a PS/1 or PS/2 system */
+    DEVICE_ISA       = 0x10,       /* requires the ISA bus */
+    DEVICE_CBUS      = 0x20,       /* requires the C-BUS bus */
+    DEVICE_MCA       = 0x40,       /* requires the MCA bus */
+    DEVICE_EISA      = 0x80,       /* requires the EISA bus */
+    DEVICE_VLB       = 0x100,      /* requires the PCI bus */
+    DEVICE_PCI       = 0x200,      /* requires the VLB bus */
+    DEVICE_AGP       = 0x400,      /* requires the AGP bus */
+    DEVICE_AC97      = 0x800,      /* requires the AC'97 bus */
+    DEVICE_COM       = 0x1000,     /* requires a serial port */
+    DEVICE_LPT       = 0x2000,     /* requires a parallel port */
+    DEVICE_KBC       = 0x4000,     /* is a keyboard controller */
 
-    DEVICE_EXTPARAMS = 0x40000000 /* accepts extended parameters */
+    DEVICE_EXTPARAMS = 0x40000000, /* accepts extended parameters */
+
+    DEVICE_ALL       = 0xffffffff  /* match all devices */
 };
 
 #define BIOS_NORMAL                      0
@@ -171,8 +174,7 @@ extern void *device_cadd_inst_parameters(const device_t *d, const device_t *cd,
 extern void  device_cadd_inst_ex(const device_t *d, const device_t *cd, void *priv, int inst);
 extern void  device_cadd_inst_ex_parameters(const device_t *d, const device_t *cd, void *priv, int inst, void *params);
 extern void  device_close_all(void);
-extern void  device_reset_all(void);
-extern void  device_reset_all_pci(void);
+extern void  device_reset_all(uint32_t match_flags);
 extern void *device_get_priv(const device_t *d);
 extern int   device_available(const device_t *d);
 extern int   device_poll(const device_t *d, int x, int y, int z, int b);
diff --git a/src/include/86box/ini.h b/src/include/86box/ini.h
index 5eca9ab8c..866787352 100644
--- a/src/include/86box/ini.h
+++ b/src/include/86box/ini.h
@@ -30,26 +30,26 @@ typedef void *ini_t;
 typedef void *ini_section_t;
 
 extern ini_t ini_new(void);
-extern ini_t ini_read(char *fn);
-extern void  ini_write(ini_t ini, char *fn);
+extern ini_t ini_read(const char *fn);
+extern void  ini_write(ini_t ini, const char *fn);
 extern void  ini_dump(ini_t ini);
 extern void  ini_close(ini_t ini);
 
-extern void     ini_section_delete_var(ini_section_t section, char *name);
-extern int      ini_section_get_int(ini_section_t section, char *name, int def);
-extern double   ini_section_get_double(ini_section_t section, char *name, double def);
-extern int      ini_section_get_hex16(ini_section_t section, char *name, int def);
-extern int      ini_section_get_hex20(ini_section_t section, char *name, int def);
-extern int      ini_section_get_mac(ini_section_t section, char *name, int def);
-extern char    *ini_section_get_string(ini_section_t section, char *name, char *def);
-extern wchar_t *ini_section_get_wstring(ini_section_t section, char *name, wchar_t *def);
-extern void     ini_section_set_int(ini_section_t section, char *name, int val);
-extern void     ini_section_set_double(ini_section_t section, char *name, double val);
-extern void     ini_section_set_hex16(ini_section_t section, char *name, int val);
-extern void     ini_section_set_hex20(ini_section_t section, char *name, int val);
-extern void     ini_section_set_mac(ini_section_t section, char *name, int val);
+extern void     ini_section_delete_var(ini_section_t section, const char *name);
+extern int      ini_section_get_int(ini_section_t section, const char *name, int def);
+extern double   ini_section_get_double(ini_section_t section, const char *name, double def);
+extern int      ini_section_get_hex16(ini_section_t section, const char *name, int def);
+extern int      ini_section_get_hex20(ini_section_t section, const char *name, int def);
+extern int      ini_section_get_mac(ini_section_t section, const char *name, int def);
+extern char    *ini_section_get_string(ini_section_t section, const char *name, char *def);
+extern wchar_t *ini_section_get_wstring(ini_section_t section, const char *name, wchar_t *def);
+extern void     ini_section_set_int(ini_section_t section, const char *name, int val);
+extern void     ini_section_set_double(ini_section_t section, const char *name, double val);
+extern void     ini_section_set_hex16(ini_section_t section, const char *name, int val);
+extern void     ini_section_set_hex20(ini_section_t section, const char *name, int val);
+extern void     ini_section_set_mac(ini_section_t section, const char *name, int val);
 extern void     ini_section_set_string(ini_section_t section, const char *name, const char *val);
-extern void     ini_section_set_wstring(ini_section_t section, char *name, wchar_t *val);
+extern void     ini_section_set_wstring(ini_section_t section, const char *name, wchar_t *val);
 
 #define ini_delete_var(ini, head, name)       ini_section_delete_var(ini_find_section(ini, head), name)
 
@@ -69,13 +69,13 @@ extern void     ini_section_set_wstring(ini_section_t section, char *name, wchar
 #define ini_set_string(ini, head, name, val)  ini_section_set_string(ini_find_or_create_section(ini, head), name, val)
 #define ini_set_wstring(ini, head, name, val) ini_section_set_wstring(ini_find_or_create_section(ini, head), name, val)
 
-extern ini_section_t ini_find_section(ini_t ini, char *name);
-extern ini_section_t ini_find_or_create_section(ini_t ini, char *name);
-extern void          ini_rename_section(ini_section_t section, char *name);
+extern ini_section_t ini_find_section(ini_t ini, const char *name);
+extern ini_section_t ini_find_or_create_section(ini_t ini, const char *name);
+extern void          ini_rename_section(ini_section_t section, const char *name);
 extern void          ini_delete_section_if_empty(ini_t ini, ini_section_t section);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/include/86box/keyboard.h b/src/include/86box/keyboard.h
index 2834937ff..fe642fec1 100644
--- a/src/include/86box/keyboard.h
+++ b/src/include/86box/keyboard.h
@@ -27,27 +27,33 @@ enum {
     DEV_AUX
 };
 
+enum {
+    DEV_STATE_MAIN_1 = 0,
+    DEV_STATE_MAIN_OUT,
+    DEV_STATE_MAIN_2,
+    DEV_STATE_MAIN_CMD,
+    DEV_STATE_MAIN_WANT_IN,
+    DEV_STATE_MAIN_IN
+};
+
 /* Used by the AT / PS/2 keyboard controller, common device, keyboard, and mouse. */
 typedef struct {
-    uint8_t wantcmd, dat, pad, pad0;
+    uint8_t wantcmd, dat;
 
-    int   out_new;
+    int16_t out_new;
 
     void *priv;
 
     void  (*poll)(void *priv);
-} kbc_port_t;
+} kbc_at_port_t;
 
 /* Used by the AT / PS/2 common device, keyboard, and mouse. */
 typedef struct {
     const char *name; /* name of this device */
 
-    uint8_t type, inst, command, wantdata,
-            last_scan_code, state, resolution, rate,
-            cmd_queue_start, cmd_queue_end, queue_start, queue_end;
-
-    /* 6 bytes needed for mouse */
-    uint8_t last_data[6];
+    uint8_t type, command, last_scan_code, state,
+            resolution, rate, cmd_queue_start, cmd_queue_end,
+            queue_start, queue_end;
 
     uint16_t flags;
 
@@ -65,7 +71,7 @@ typedef struct {
     void    (*process_cmd)(void *priv);
     void    (*execute_bat)(void *priv);
 
-    kbc_port_t *port;
+    kbc_at_port_t *port;
 } atkbc_dev_t;
 
 typedef struct {
@@ -188,7 +194,7 @@ extern int     mouse_queue_start, mouse_queue_end;
 extern int     mouse_cmd_queue_start, mouse_cmd_queue_end;
 extern int     mouse_scan;
 
-extern kbc_port_t     *kbc_ports[2];
+extern kbc_at_port_t     *kbc_at_ports[2];
 
 #ifdef EMU_DEVICE_H
 extern const device_t keyboard_pc_device;
@@ -219,7 +225,6 @@ extern const device_t keyboard_ps2_ami_device;
 extern const device_t keyboard_ps2_tg_ami_device;
 extern const device_t keyboard_ps2_tg_ami_green_device;
 extern const device_t keyboard_ps2_olivetti_device;
-extern const device_t keyboard_ps2_mca_device;
 extern const device_t keyboard_ps2_mca_2_device;
 extern const device_t keyboard_ps2_quadtel_device;
 extern const device_t keyboard_ps2_pci_device;
@@ -228,6 +233,8 @@ extern const device_t keyboard_ps2_intel_ami_pci_device;
 extern const device_t keyboard_ps2_acer_pci_device;
 extern const device_t keyboard_ps2_ali_pci_device;
 extern const device_t keyboard_ps2_tg_ami_pci_device;
+
+extern const device_t keyboard_at_generic_device;
 #endif /*EMU_DEVICE_H*/
 
 extern void     keyboard_init(void);
@@ -249,14 +256,10 @@ extern int      keyboard_isfsexit_down(void);
 extern int      keyboard_ismsexit(void);
 extern void     keyboard_set_is_amstrad(int ams);
 
-extern void    keyboard_at_adddata_mouse(uint8_t val);
-extern void    keyboard_at_adddata_mouse_cmd(uint8_t val);
-extern void    keyboard_at_mouse_reset(void);
-extern uint8_t keyboard_at_mouse_pos(void);
-extern void    keyboard_at_set_mouse(void (*mouse_write)(uint8_t val, void *), void *);
-extern void    keyboard_at_set_a20_key(int state);
-extern void    keyboard_at_reset(void);
-extern void    kbc_at_a20_reset(void);
+extern uint8_t      kbc_at_dev_queue_pos(atkbc_dev_t *dev, uint8_t main);
+extern void         kbc_at_dev_queue_add(atkbc_dev_t *dev, uint8_t val, uint8_t main);
+extern void         kbc_at_dev_reset(atkbc_dev_t *dev, int do_fa);
+extern atkbc_dev_t *kbc_at_dev_init(uint8_t inst);
 
 #ifdef __cplusplus
 }
diff --git a/src/include/86box/machine.h b/src/include/86box/machine.h
index 02c7fdd35..40afbf825 100644
--- a/src/include/86box/machine.h
+++ b/src/include/86box/machine.h
@@ -772,6 +772,7 @@ extern int machine_ps2_model_65sx_init(const machine_t *);
 extern int machine_ps2_model_70_type3_init(const machine_t *);
 extern int machine_ps2_model_80_init(const machine_t *);
 extern int machine_ps2_model_80_axx_init(const machine_t *);
+extern int machine_ps2_model_70_type4_init(const machine_t *);
 
 /* m_tandy.c */
 extern int tandy1k_eeprom_read(void);
diff --git a/src/include/86box/mouse.h b/src/include/86box/mouse.h
index 317e267a0..b697c5d38 100644
--- a/src/include/86box/mouse.h
+++ b/src/include/86box/mouse.h
@@ -80,6 +80,8 @@ extern void mouse_poll(void);
 
 extern void mouse_bus_set_irq(void *priv, int irq);
 
+extern void mouse_set_sample_rate(double new_rate);
+
 extern char *mouse_get_name(int mouse);
 extern char *mouse_get_internal_name(int mouse);
 extern int   mouse_get_from_internal_name(char *s);
diff --git a/src/include/86box/resource.h b/src/include/86box/resource.h
index 397443299..431f42134 100644
--- a/src/include/86box/resource.h
+++ b/src/include/86box/resource.h
@@ -185,19 +185,20 @@
 #ifdef USE_DYNAREC
 #    define IDC_CHECK_DYNAREC 1017
 #endif
-#define IDC_MEMTEXT               1018
-#define IDC_MEMSPIN               1019
+#define IDC_CHECK_SOFTFLOAT       1018
+#define IDC_MEMTEXT               1019
+#define IDC_MEMSPIN               1020
 #define IDC_TEXT_MB               IDT_MB
 
-#define IDC_VIDEO                 1020 /* video config */
-#define IDC_COMBO_VIDEO           1021
-#define IDC_VIDEO_2               1022
-#define IDC_COMBO_VIDEO_2         1023
-#define IDC_CHECK_VOODOO          1024
-#define IDC_BUTTON_VOODOO         1025
-#define IDC_CHECK_IBM8514         1026
-#define IDC_CHECK_XGA             1027
-#define IDC_BUTTON_XGA            1028
+#define IDC_VIDEO                 1021 /* video config */
+#define IDC_COMBO_VIDEO           1022
+#define IDC_VIDEO_2               1023
+#define IDC_COMBO_VIDEO_2         1024
+#define IDC_CHECK_VOODOO          1025
+#define IDC_BUTTON_VOODOO         1026
+#define IDC_CHECK_IBM8514         1027
+#define IDC_CHECK_XGA             1028
+#define IDC_BUTTON_XGA            1029
 
 #define IDC_INPUT                 1030 /* input config */
 #define IDC_COMBO_MOUSE           1031
diff --git a/src/include/86box/sound.h b/src/include/86box/sound.h
index dada9e164..9f36a70b6 100644
--- a/src/include/86box/sound.h
+++ b/src/include/86box/sound.h
@@ -24,13 +24,13 @@
 
 extern int sound_gain;
 
-#define FREQ_44100 44100
-#define FREQ_48000 48000
-#define FREQ_49716 49716
-#define FREQ_88200 88200
-#define FREQ_96000 96000
+#define FREQ_44100  44100
+#define FREQ_48000  48000
+#define FREQ_49716  49716
+#define FREQ_88200  88200
+#define FREQ_96000  96000
 
-#define SOUND_FREQ FREQ_48000
+#define SOUND_FREQ  FREQ_48000
 #define SOUNDBUFLEN (SOUND_FREQ / 50)
 
 #define CD_FREQ     FREQ_44100
@@ -62,7 +62,7 @@ extern const device_t *sound_card_getdevice(int card);
 #endif
 extern int   sound_card_has_config(int card);
 extern char *sound_card_get_internal_name(int card);
-extern int   sound_card_get_from_internal_name(char *s);
+extern int   sound_card_get_from_internal_name(const char *s);
 extern void  sound_card_init(void);
 extern void  sound_set_cd_volume(unsigned int vol_l, unsigned int vol_r);
 
diff --git a/src/include/86box/usb.h b/src/include/86box/usb.h
index d0b169b6c..a20f1b2a2 100644
--- a/src/include/86box/usb.h
+++ b/src/include/86box/usb.h
@@ -22,15 +22,36 @@
 extern "C" {
 #endif
 
+typedef struct usb_t usb_t;
+
+/* USB device creation parameters struct */
 typedef struct
+{
+    void (*raise_interrupt)(usb_t*, void*);
+    void* parent_priv;
+} usb_params_t;
+
+/* USB Host Controller device struct */
+typedef struct usb_t
 {
     uint8_t       uhci_io[32], ohci_mmio[4096];
     uint16_t      uhci_io_base;
     int           uhci_enable, ohci_enable;
     uint32_t      ohci_mem_base;
     mem_mapping_t ohci_mmio_mapping;
+    pc_timer_t    ohci_frame_timer;
+    pc_timer_t    ohci_port_reset_timer[2];
+
+    usb_params_t* usb_params;
 } usb_t;
 
+/* USB endpoint device struct. Incomplete and unused. */
+typedef struct
+{
+    uint16_t vendor_id;
+    uint16_t device_id;
+} usb_device_t;
+
 /* Global variables. */
 extern const device_t usb_device;
 
diff --git a/src/ini.c b/src/ini.c
index 009d21c0b..d41573bed 100644
--- a/src/ini.c
+++ b/src/ini.c
@@ -101,10 +101,10 @@ ini_log(const char *fmt, ...)
 #endif
 
 static section_t *
-find_section(list_t *head, char *name)
+find_section(list_t *head, const char *name)
 {
     section_t *sec     = (section_t *) head->next;
-    char       blank[] = "";
+    const char blank[] = "";
 
     if (name == NULL)
         name = blank;
@@ -120,7 +120,7 @@ find_section(list_t *head, char *name)
 }
 
 ini_section_t
-ini_find_section(ini_t ini, char *name)
+ini_find_section(ini_t ini, const char *name)
 {
     if (ini == NULL)
         return NULL;
@@ -129,7 +129,7 @@ ini_find_section(ini_t ini, char *name)
 }
 
 void
-ini_rename_section(ini_section_t section, char *name)
+ini_rename_section(ini_section_t section, const char *name)
 {
     section_t *sec = (section_t *) section;
 
@@ -197,7 +197,7 @@ ini_delete_section_if_empty(ini_t ini, ini_section_t section)
 }
 
 static section_t *
-create_section(list_t *head, char *name)
+create_section(list_t *head, const char *name)
 {
     section_t *ns = malloc(sizeof(section_t));
 
@@ -209,7 +209,7 @@ create_section(list_t *head, char *name)
 }
 
 ini_section_t
-ini_find_or_create_section(ini_t ini, char *name)
+ini_find_or_create_section(ini_t ini, const char *name)
 {
     if (ini == NULL)
         return NULL;
@@ -263,7 +263,7 @@ ini_close(ini_t ini)
 }
 
 static int
-ini_detect_bom(char *fn)
+ini_detect_bom(const char *fn)
 {
     FILE         *f;
     unsigned char bom[4] = { 0, 0, 0, 0 };
@@ -311,7 +311,7 @@ ini_fgetws(wchar_t *str, int count, FILE *stream)
 
 /* Read and parse the configuration file into memory. */
 ini_t
-ini_read(char *fn)
+ini_read(const char *fn)
 {
     char       sname[128], ename[128];
     wchar_t    buff[1024];
@@ -438,7 +438,7 @@ ini_read(char *fn)
 
 /* Write the in-memory configuration to disk. */
 void
-ini_write(ini_t ini, char *fn)
+ini_write(ini_t ini, const char *fn)
 {
     wchar_t    wtemp[512];
     list_t    *list = (list_t *) ini;
@@ -521,7 +521,7 @@ ini_dump(ini_t ini)
 }
 
 void
-ini_section_delete_var(ini_section_t self, char *name)
+ini_section_delete_var(ini_section_t self, const char *name)
 {
     section_t *section = (section_t *) self;
     entry_t   *entry;
@@ -537,7 +537,7 @@ ini_section_delete_var(ini_section_t self, char *name)
 }
 
 int
-ini_section_get_int(ini_section_t self, char *name, int def)
+ini_section_get_int(ini_section_t self, const char *name, int def)
 {
     section_t *section = (section_t *) self;
     entry_t   *entry;
@@ -556,7 +556,7 @@ ini_section_get_int(ini_section_t self, char *name, int def)
 }
 
 double
-ini_section_get_double(ini_section_t self, char *name, double def)
+ini_section_get_double(ini_section_t self, const char *name, double def)
 {
     section_t *section = (section_t *) self;
     entry_t   *entry;
@@ -575,7 +575,7 @@ ini_section_get_double(ini_section_t self, char *name, double def)
 }
 
 int
-ini_section_get_hex16(ini_section_t self, char *name, int def)
+ini_section_get_hex16(ini_section_t self, const char *name, int def)
 {
     section_t   *section = (section_t *) self;
     entry_t     *entry;
@@ -594,7 +594,7 @@ ini_section_get_hex16(ini_section_t self, char *name, int def)
 }
 
 int
-ini_section_get_hex20(ini_section_t self, char *name, int def)
+ini_section_get_hex20(ini_section_t self, const char *name, int def)
 {
     section_t   *section = (section_t *) self;
     entry_t     *entry;
@@ -613,7 +613,7 @@ ini_section_get_hex20(ini_section_t self, char *name, int def)
 }
 
 int
-ini_section_get_mac(ini_section_t self, char *name, int def)
+ini_section_get_mac(ini_section_t self, const char *name, int def)
 {
     section_t   *section = (section_t *) self;
     entry_t     *entry;
@@ -632,7 +632,7 @@ ini_section_get_mac(ini_section_t self, char *name, int def)
 }
 
 char *
-ini_section_get_string(ini_section_t self, char *name, char *def)
+ini_section_get_string(ini_section_t self, const char *name, char *def)
 {
     section_t *section = (section_t *) self;
     entry_t   *entry;
@@ -648,7 +648,7 @@ ini_section_get_string(ini_section_t self, char *name, char *def)
 }
 
 wchar_t *
-ini_section_get_wstring(ini_section_t self, char *name, wchar_t *def)
+ini_section_get_wstring(ini_section_t self, const char *name, wchar_t *def)
 {
     section_t *section = (section_t *) self;
     entry_t   *entry;
@@ -664,7 +664,7 @@ ini_section_get_wstring(ini_section_t self, char *name, wchar_t *def)
 }
 
 void
-ini_section_set_int(ini_section_t self, char *name, int val)
+ini_section_set_int(ini_section_t self, const char *name, int val)
 {
     section_t *section = (section_t *) self;
     entry_t   *ent;
@@ -681,7 +681,7 @@ ini_section_set_int(ini_section_t self, char *name, int val)
 }
 
 void
-ini_section_set_double(ini_section_t self, char *name, double val)
+ini_section_set_double(ini_section_t self, const char *name, double val)
 {
     section_t *section = (section_t *) self;
     entry_t   *ent;
@@ -698,7 +698,7 @@ ini_section_set_double(ini_section_t self, char *name, double val)
 }
 
 void
-ini_section_set_hex16(ini_section_t self, char *name, int val)
+ini_section_set_hex16(ini_section_t self, const char *name, int val)
 {
     section_t *section = (section_t *) self;
     entry_t   *ent;
@@ -715,7 +715,7 @@ ini_section_set_hex16(ini_section_t self, char *name, int val)
 }
 
 void
-ini_section_set_hex20(ini_section_t self, char *name, int val)
+ini_section_set_hex20(ini_section_t self, const char *name, int val)
 {
     section_t *section = (section_t *) self;
     entry_t   *ent;
@@ -732,7 +732,7 @@ ini_section_set_hex20(ini_section_t self, char *name, int val)
 }
 
 void
-ini_section_set_mac(ini_section_t self, char *name, int val)
+ini_section_set_mac(ini_section_t self, const char *name, int val)
 {
     section_t *section = (section_t *) self;
     entry_t   *ent;
@@ -774,7 +774,7 @@ ini_section_set_string(ini_section_t self, const char *name, const char *val)
 }
 
 void
-ini_section_set_wstring(ini_section_t self, char *name, wchar_t *val)
+ini_section_set_wstring(ini_section_t self, const char *name, wchar_t *val)
 {
     section_t *section = (section_t *) self;
     entry_t   *ent;
diff --git a/src/io.c b/src/io.c
index 87cceae62..0cd7cd87b 100644
--- a/src/io.c
+++ b/src/io.c
@@ -56,7 +56,6 @@ typedef struct {
 int   initialized = 0;
 io_t *io[NPORTS], *io_last[NPORTS];
 
-// #define ENABLE_IO_LOG 1
 #ifdef ENABLE_IO_LOG
 int io_do_log = ENABLE_IO_LOG;
 
@@ -311,9 +310,7 @@ inb(uint16_t port)
     /* if (port == 0x1ed)
         ret = 0xfe; */
 
-    if (port == 0x92) {
-        io_log("[%04X:%08X] (%i, %i, %04i) in b(%04X) = %02X\n", CS, cpu_state.pc, in_smm, found, qfound, port, ret);
-    }
+    io_log("[%04X:%08X] (%i, %i, %04i) in b(%04X) = %02X\n", CS, cpu_state.pc, in_smm, found, qfound, port, ret);
 
     return (ret);
 }
@@ -344,9 +341,7 @@ outb(uint16_t port, uint8_t val)
 #endif
     }
 
-    if (port == 0x92) {
-        io_log("[%04X:%08X] (%i, %i, %04i) outb(%04X, %02X)\n", CS, cpu_state.pc, in_smm, found, qfound, port, val);
-    }
+    io_log("[%04X:%08X] (%i, %i, %04i) outb(%04X, %02X)\n", CS, cpu_state.pc, in_smm, found, qfound, port, val);
 
     return;
 }
@@ -400,9 +395,7 @@ inw(uint16_t port)
     if (!found)
         cycles -= io_delay;
 
-    if (port == 0x92) {
-        io_log("[%04X:%08X] (%i, %i, %04i) in w(%04X) = %04X\n", CS, cpu_state.pc, in_smm, found, qfound, port, ret);
-    }
+    io_log("[%04X:%08X] (%i, %i, %04i) in w(%04X) = %04X\n", CS, cpu_state.pc, in_smm, found, qfound, port, ret);
 
     return ret;
 }
@@ -447,9 +440,7 @@ outw(uint16_t port, uint16_t val)
 #endif
     }
 
-    if (port == 0x92) {
-        io_log("[%04X:%08X] (%i, %i, %04i) outw(%04X, %04X)\n", CS, cpu_state.pc, in_smm, found, qfound, port, val);
-    }
+    io_log("[%04X:%08X] (%i, %i, %04i) outw(%04X, %04X)\n", CS, cpu_state.pc, in_smm, found, qfound, port, val);
 
     return;
 }
@@ -531,9 +522,7 @@ inl(uint16_t port)
     if (!found)
         cycles -= io_delay;
 
-    if (port == 0x92) {
-        io_log("[%04X:%08X] (%i, %i, %04i) in l(%04X) = %08X\n", CS, cpu_state.pc, in_smm, found, qfound, port, ret);
-    }
+    io_log("[%04X:%08X] (%i, %i, %04i) in l(%04X) = %08X\n", CS, cpu_state.pc, in_smm, found, qfound, port, ret);
 
     return ret;
 }
@@ -593,9 +582,7 @@ outl(uint16_t port, uint32_t val)
 #endif
     }
 
-    if (port == 0x92) {
-        io_log("[%04X:%08X] (%i, %i, %04i) outl(%04X, %08X)\n", CS, cpu_state.pc, in_smm, found, qfound, port, val);
-    }
+    io_log("[%04X:%08X] (%i, %i, %04i) outl(%04X, %08X)\n", CS, cpu_state.pc, in_smm, found, qfound, port, val);
 
     return;
 }
diff --git a/src/machine/m_ps2_mca.c b/src/machine/m_ps2_mca.c
index 3de9ef491..8eb6565fc 100644
--- a/src/machine/m_ps2_mca.c
+++ b/src/machine/m_ps2_mca.c
@@ -998,7 +998,7 @@ ps2_mca_board_model_55sx_init(int has_sec_nvram, int slots)
     }
 
     mca_init(slots);
-    device_add(&keyboard_ps2_mca_device);
+    device_add(&keyboard_ps2_device);
 
     if (has_sec_nvram == 1)
         device_add(&ps2_nvr_55ls_device);
@@ -1162,7 +1162,7 @@ ps2_mca_board_model_70_type34_init(int is_type4, int slots)
 
     ps2.split_addr = mem_size * 1024;
     mca_init(slots);
-    device_add(&keyboard_ps2_mca_device);
+    device_add(&keyboard_ps2_device);
 
     ps2.planar_read  = model_70_type3_read;
     ps2.planar_write = model_70_type3_write;
@@ -1255,7 +1255,7 @@ ps2_mca_board_model_80_type2_init(int is486)
 
     ps2.split_addr = mem_size * 1024;
     mca_init(8);
-    device_add(&keyboard_ps2_mca_device);
+    device_add(&keyboard_ps2_device);
 
     ps2.planar_read  = model_80_read;
     ps2.planar_write = model_80_write;
@@ -1343,9 +1343,6 @@ machine_ps2_common_init(const machine_t *model)
     nmi_mask = 0x80;
 
     ps2.uart = device_add_inst(&ns16550_device, 1);
-
-    pic_kbd_latch(0x01);
-    pic_mouse_latch(0x01);
 }
 
 int
@@ -1495,3 +1492,24 @@ machine_ps2_model_80_axx_init(const machine_t *model)
 
     return ret;
 }
+
+int
+machine_ps2_model_70_type4_init(const machine_t *model)
+{
+    int ret;
+
+    ret = bios_load_interleaved("roms/machines/ibmps2_m70_type4/64F3126.BIN",
+                                "roms/machines/ibmps2_m70_type4/64F3125.BIN",
+                                0x000e0000, 131072, 0);
+
+    if (bios_only || !ret)
+        return ret;
+
+    machine_ps2_common_init(model);
+
+    ps2.planar_id = 0xf9ff;
+
+    ps2_mca_board_model_70_type34_init(1, 4);
+
+    return ret;
+}
diff --git a/src/machine/machine_table.c b/src/machine/machine_table.c
index 19892beb3..a3fe4c125 100644
--- a/src/machine/machine_table.c
+++ b/src/machine/machine_table.c
@@ -5470,7 +5470,45 @@ const machine_t machines[] = {
         .snd_device = NULL,
         .net_device = NULL
     },
-
+    /* Has IBM PS/2 Type 1 KBC firmware. */
+    {
+        .name = "[MCA] IBM PS/2 model 70 (type 4)",
+        .internal_name = "ibmps2_m70_type4",
+        .type = MACHINE_TYPE_486,
+        .chipset = MACHINE_CHIPSET_PROPRIETARY,
+        .init = machine_ps2_model_70_type4_init,
+        .pad = 0,
+        .pad0 = 0,
+        .pad1 = MACHINE_AVAILABLE,
+        .pad2 = 0,
+        .cpu = {
+            .package = CPU_PKG_SOCKET1,
+            .block = CPU_BLOCK_NONE,
+            .min_bus = 0,
+            .max_bus = 0,
+            .min_voltage = 0,
+            .max_voltage = 0,
+            .min_multi = 0,
+            .max_multi = 0
+        },
+        .bus_flags = MACHINE_PS2_MCA,
+        .flags = MACHINE_VIDEO,
+        .ram = {
+            .min = 2048,
+            .max = 65536,
+            .step = 2048
+        },
+        .nvrmask = 63,
+        .kbc_device = NULL,
+        .kbc_p1 = 0,
+        .gpio = 0,
+        .device = NULL,
+        .fdc_device = NULL,
+        .sio_device = NULL,
+        .vid_device = NULL,
+        .snd_device = NULL,
+        .net_device = NULL
+    },
     /* 486 machines - Socket 2 */
     /* 486 machines with just the ISA slot */
     /* Uses some variant of Phoenix MultiKey/42 as the BIOS sends keyboard controller
@@ -12593,7 +12631,7 @@ const machine_t machines[] = {
             .min_voltage = 1300,
             .max_voltage = 3500,
             .min_multi = 1.5,
-            .max_multi = 8.0 /* limits assumed */ 
+            .max_multi = 8.0 /* limits assumed */
         },
         .bus_flags = MACHINE_PS2_AGP,
         .flags = MACHINE_IDE_DUAL | MACHINE_APM | MACHINE_ACPI,
diff --git a/src/mem/mem.c b/src/mem/mem.c
index e0730e008..96b120112 100644
--- a/src/mem/mem.c
+++ b/src/mem/mem.c
@@ -3040,7 +3040,7 @@ mem_remap_top(int kb)
             if (addr >= 0x000c0000)
                 addr += 0x00010000;
         }
-        if (start_addr != 0)
+        if (start_addr == 0)
             start_addr = addr;
         pages[c].mem     = set ? &ram[addr] : page_ff;
         pages[c].write_b = set ? mem_write_ramb_page : NULL;
diff --git a/src/pci.c b/src/pci.c
index f9155e2e3..692733422 100644
--- a/src/pci.c
+++ b/src/pci.c
@@ -922,12 +922,11 @@ trc_reset(uint8_t val)
         dma_reset();
         dma_set_at(1);
 
-        device_reset_all();
+        device_reset_all(DEVICE_ALL);
 
         cpu_alt_reset = 0;
 
         pci_reset();
-        keyboard_at_reset();
 
         mem_a20_alt = 0;
         mem_a20_recalc();
diff --git a/src/pic.c b/src/pic.c
index 25a90e1c7..62ceabd5d 100644
--- a/src/pic.c
+++ b/src/pic.c
@@ -202,7 +202,7 @@ find_best_interrupt(pic_t *dev)
 
     intr = dev->interrupt = (ret == -1) ? 0x17 : ret;
 
-    if (dev->at && (ret != 1)) {
+    if (dev->at && (ret != -1)) {
         if (dev == &pic2)
             intr += 8;
 
@@ -570,8 +570,16 @@ pic_reset_hard(void)
 {
     pic_reset();
 
-    pic_kbd_latch(0x00);
-    pic_mouse_latch(0x00);
+    /* The situation is as follows: There is a giant mess when it comes to these latches on real hardware,
+       to the point that there's even boards with board-level latched that get used in place of the latches
+       on the chipset, therefore, I'm just doing this here for the sake of simplicity. */
+    if (machine_has_bus(machine, MACHINE_BUS_PS2)) {
+        pic_kbd_latch(0x01);
+        pic_mouse_latch(0x01);
+    } else {
+        pic_kbd_latch(0x00);
+        pic_mouse_latch(0x00);
+    }
 }
 
 void
@@ -644,7 +652,7 @@ picint_common(uint16_t num, int level, int set)
                 pic2.lines |= (num >> 8);
 
             /* Latch IRQ 12 if the mouse latch is enabled. */
-            if (mouse_latch && (num & 0x1000))
+            if ((num & 0x1000) && mouse_latch)
                 pic2.lines |= 0x10;
 
             pic2.irr |= (num >> 8);
diff --git a/src/port_92.c b/src/port_92.c
index cbc419569..fd0471b0e 100644
--- a/src/port_92.c
+++ b/src/port_92.c
@@ -69,11 +69,18 @@ port_92_readw(uint16_t port, void *priv)
     return ret;
 }
 
+/*
+   This does the exact same thing as keyboard controller reset.
+   TODO: ALi M1543(c) behavior.
+ */
 static void
 port_92_pulse(void *priv)
 {
-    resetx86();
+    softresetx86(); /* Pulse reset! */
     cpu_set_edx();
+    flushmmucache();
+
+    cpu_alt_reset = 1;
 }
 
 static void
@@ -166,6 +173,15 @@ port_92_remove(void *priv)
                          port_92_readb, NULL, NULL, port_92_writeb, NULL, NULL, dev);
 }
 
+static void
+port_92_reset(void *priv)
+{
+    cpu_alt_reset = 0;
+
+    mem_a20_alt = 0x00;
+    mem_a20_recalc();
+}
+
 static void
 port_92_close(void *priv)
 {
@@ -252,7 +268,7 @@ const device_t port_92_pci_device = {
     .local         = PORT_92_PCI,
     .init          = port_92_init,
     .close         = port_92_close,
-    .reset         = NULL,
+    .reset         = port_92_reset,
     { .available = NULL },
     .speed_changed = NULL,
     .force_redraw  = NULL,
diff --git a/src/qt/languages/uk-UA.po b/src/qt/languages/uk-UA.po
index 0ebe3e5cc..4193fe934 100644
--- a/src/qt/languages/uk-UA.po
+++ b/src/qt/languages/uk-UA.po
@@ -305,7 +305,7 @@ msgid "&Default"
 msgstr "&За замовчуванням"
 
 msgid "Language:"
-msgstr "Язык:"
+msgstr "Мова:"
 
 msgid "Icon set:"
 msgstr "Набір іконок:"
@@ -446,7 +446,7 @@ msgid "PCap device:"
 msgstr "Пристрій PCap:"
 
 msgid "Network adapter:"
-msgstr "Мережева карта:"
+msgstr "Мережевий адаптер:"
 
 msgid "COM1 Device:"
 msgstr "Пристрій COM1:"
diff --git a/src/qt/qt_d3d9renderer.cpp b/src/qt/qt_d3d9renderer.cpp
index fed8e72b3..f2c9fe9f4 100644
--- a/src/qt/qt_d3d9renderer.cpp
+++ b/src/qt/qt_d3d9renderer.cpp
@@ -24,11 +24,16 @@ D3D9Renderer::D3D9Renderer(QWidget *parent, int monitor_index)
 
     windowHandle = (HWND) winId();
     surfaceInUse = true;
+    finalized = true;
 
     RendererCommon::parentWidget = parent;
 
     this->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding);
     this->m_monitor_index = monitor_index;
+
+    d3d9surface = nullptr;
+    d3d9dev = nullptr;
+    d3d9 = nullptr;
 }
 
 D3D9Renderer::~D3D9Renderer()
@@ -67,6 +72,7 @@ D3D9Renderer::hideEvent(QHideEvent *event)
 void
 D3D9Renderer::showEvent(QShowEvent *event)
 {
+    if (d3d9) finalize();
     params = {};
 
     if (FAILED(Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9))) {
@@ -80,7 +86,7 @@ D3D9Renderer::showEvent(QShowEvent *event)
     params.BackBufferCount            = 1;
     params.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
     params.PresentationInterval       = D3DPRESENT_INTERVAL_IMMEDIATE;
-    params.hDeviceWindow              = windowHandle;
+    params.hDeviceWindow              = (HWND) winId();
 
     HRESULT result = d3d9->CreateDeviceEx(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, windowHandle, D3DCREATE_MULTITHREADED | D3DCREATE_HARDWARE_VERTEXPROCESSING, &params, nullptr, &d3d9dev);
     if (FAILED(result))
@@ -118,10 +124,10 @@ D3D9Renderer::paintEvent(QPaintEvent *event)
     srcRect.bottom = source.bottom();
     srcRect.left   = source.left();
     srcRect.right  = source.right();
-    dstRect.top    = destination.top();
-    dstRect.bottom = destination.bottom();
-    dstRect.left   = destination.left();
-    dstRect.right  = destination.right();
+    dstRect.top    = destination.top() * devicePixelRatioF();
+    dstRect.bottom = destination.bottom() * devicePixelRatioF();
+    dstRect.left   = destination.left() * devicePixelRatioF();
+    dstRect.right  = destination.right() * devicePixelRatioF();
     d3d9dev->BeginScene();
     d3d9dev->Clear(0, nullptr, D3DCLEAR_TARGET, 0xFF000000, 0, 0);
     while (surfaceInUse) { }
diff --git a/src/qt/qt_settingsmachine.cpp b/src/qt/qt_settingsmachine.cpp
index e08b07997..fbe30949c 100644
--- a/src/qt/qt_settingsmachine.cpp
+++ b/src/qt/qt_settingsmachine.cpp
@@ -105,6 +105,12 @@ SettingsMachine::save()
     cpu             = ui->comboBoxSpeed->currentData().toInt();
     fpu_type        = ui->comboBoxFPU->currentData().toInt();
     cpu_use_dynarec = ui->checkBoxDynamicRecompiler->isChecked() ? 1 : 0;
+    fpu_softfloat   = (ui->checkBoxFPUSoftfloat->isChecked() && !cpu_use_dynarec) ? 1 : 0;
+    if (!strcmp(machines[machine].internal_name, "ibmps2_m70_type4")) {
+        cpu_use_dynarec = 0;
+        fpu_softfloat = 1;
+    }
+
     int64_t temp_mem_size;
     if (machine_get_ram_granularity(machine) < 1024) {
         temp_mem_size = ui->spinBoxRAM->value();
@@ -270,13 +276,22 @@ SettingsMachine::on_comboBoxSpeed_currentIndexChanged(int index)
     if (!(flags & CPU_SUPPORTS_DYNAREC)) {
         ui->checkBoxDynamicRecompiler->setChecked(false);
         ui->checkBoxDynamicRecompiler->setEnabled(false);
+        ui->checkBoxFPUSoftfloat->setChecked(fpu_softfloat);
+        ui->checkBoxFPUSoftfloat->setEnabled(cpu_use_dynarec ? false : true);
     } else if (flags & CPU_REQUIRES_DYNAREC) {
         ui->checkBoxDynamicRecompiler->setChecked(true);
         ui->checkBoxDynamicRecompiler->setEnabled(false);
+        ui->checkBoxFPUSoftfloat->setChecked(false);
+        ui->checkBoxFPUSoftfloat->setEnabled(false);
     } else {
         ui->checkBoxDynamicRecompiler->setChecked(cpu_use_dynarec);
         ui->checkBoxDynamicRecompiler->setEnabled(true);
+        ui->checkBoxFPUSoftfloat->setChecked(fpu_softfloat);
+        ui->checkBoxFPUSoftfloat->setEnabled(cpu_use_dynarec ? false : true);
     }
+#else
+    ui->checkBoxFPUSoftfloat->setChecked(fpu_softfloat);
+    ui->checkBoxFPUSoftfloat->setEnabled(true);
 #endif
 
     // win_settings_machine_recalc_fpu
diff --git a/src/qt/qt_settingsmachine.ui b/src/qt/qt_settingsmachine.ui
index d5ff1ca59..ee8a048f4 100644
--- a/src/qt/qt_settingsmachine.ui
+++ b/src/qt/qt_settingsmachine.ui
@@ -200,6 +200,19 @@
      </property>
     </widget>
    </item>
+   <item>
+    <widget class="QCheckBox" name="checkBoxFPUSoftfloat">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+       <horstretch>3</horstretch>
+       <verstretch>3</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="text">
+      <string>Softfloat FPU</string>
+     </property>
+    </widget>
+   </item>
    <item>
     <widget class="QGroupBox" name="groupBox">
      <property name="sizePolicy">
diff --git a/src/qt/qt_settingsstoragecontrollers.ui b/src/qt/qt_settingsstoragecontrollers.ui
index 558d4c441..9fc0ea519 100644
--- a/src/qt/qt_settingsstoragecontrollers.ui
+++ b/src/qt/qt_settingsstoragecontrollers.ui
@@ -54,16 +54,26 @@
        <property name="text">
         <string>CD-ROM Controller:</string>
        </property>
+       <property name="visible">
+        <bool>false</bool>
+       </property>
       </widget>
      </item>
      <item row="2" column="1">
-      <widget class="QComboBox" name="comboBoxCDInterface"/>
+      <widget class="QComboBox" name="comboBoxCDInterface">
+       <property name="visible">
+        <bool>false</bool>
+       </property>
+      </widget>
      </item>
      <item row="2" column="2">
       <widget class="QPushButton" name="pushButtonCDInterface">
        <property name="text">
         <string>Configure</string>
        </property>
+       <property name="visible">
+        <bool>false</bool>
+       </property>
       </widget>
      </item>
      <item row="0" column="1">
diff --git a/src/sio/sio_ali5123.c b/src/sio/sio_ali5123.c
index 33e4022c6..54949a125 100644
--- a/src/sio/sio_ali5123.c
+++ b/src/sio/sio_ali5123.c
@@ -174,6 +174,7 @@ ali5123_reset(ali5123_t *dev)
     serial_setup(dev->uart[1], 0x03e8, dev->ld_regs[5][0x70]);
 
     /* Logical device 7: Keyboard */
+    dev->ld_regs[7][0x30] = 1;
     dev->ld_regs[7][0x70] = 1;
     /* TODO: Register F0 bit 6: 0 = PS/2, 1 = AT */
 
@@ -253,6 +254,9 @@ ali5123_write(uint16_t port, uint8_t val, void *priv)
                         case 0x06:
                         case 0x08 ... 0x0a:
                             return;
+                        case 0x07:
+                            if (dev->cur_reg == 0xf0)
+                                val &= 0xbf;
                     }
                 dev->ld_regs[cur_ld][dev->cur_reg] = val;
             }
diff --git a/src/sound/snd_adlibgold.c b/src/sound/snd_adlibgold.c
index 0616e8fa6..5c95194a3 100644
--- a/src/sound/snd_adlibgold.c
+++ b/src/sound/snd_adlibgold.c
@@ -842,7 +842,6 @@ static void
 adgold_input_msg(void *p, uint8_t *msg, uint32_t len)
 {
     adgold_t *adgold = (adgold_t *) p;
-    uint8_t   i;
 
     if (adgold->sysex)
         return;
@@ -850,7 +849,7 @@ adgold_input_msg(void *p, uint8_t *msg, uint32_t len)
     if (adgold->uart_in) {
         adgold->adgold_mma_status |= 0x04;
 
-        for (i = 0; i < len; i++) {
+        for (uint32_t i = 0; i < len; i++) {
             adgold->midi_queue[adgold->midi_w++] = msg[i];
             adgold->midi_w &= 0x0f;
         }
diff --git a/src/sound/snd_audiopci.c b/src/sound/snd_audiopci.c
index 105907ede..47812d84a 100644
--- a/src/sound/snd_audiopci.c
+++ b/src/sound/snd_audiopci.c
@@ -1969,9 +1969,8 @@ static void
 es1371_input_msg(void *p, uint8_t *msg, uint32_t len)
 {
     es1371_t *dev = (es1371_t *) p;
-    uint8_t   i;
 
-    for (i = 0; i < len; i++)
+    for (uint32_t i = 0; i < len; i++)
         es1371_write_fifo(dev, msg[i]);
 }
 
diff --git a/src/sound/snd_gus.c b/src/sound/snd_gus.c
index 2bef7edac..1557f97ba 100644
--- a/src/sound/snd_gus.c
+++ b/src/sound/snd_gus.c
@@ -1085,7 +1085,6 @@ static void
 gus_input_msg(void *p, uint8_t *msg, uint32_t len)
 {
     gus_t  *gus = (gus_t *) p;
-    uint8_t i;
 
     if (gus->sysex)
         return;
@@ -1093,7 +1092,7 @@ gus_input_msg(void *p, uint8_t *msg, uint32_t len)
     if (gus->uart_in) {
         gus->midi_status |= MIDI_INT_RECEIVE;
 
-        for (i = 0; i < len; i++) {
+        for (uint32_t i = 0; i < len; i++) {
             gus->midi_queue[gus->midi_w++] = msg[i];
             gus->midi_w &= 63;
         }
diff --git a/src/sound/snd_sb_dsp.c b/src/sound/snd_sb_dsp.c
index e6a7b8312..e340c1482 100644
--- a/src/sound/snd_sb_dsp.c
+++ b/src/sound/snd_sb_dsp.c
@@ -1055,7 +1055,6 @@ void
 sb_dsp_input_msg(void *p, uint8_t *msg, uint32_t len)
 {
     sb_dsp_t *dsp = (sb_dsp_t *) p;
-    uint8_t   i   = 0;
 
     sb_dsp_log("MIDI in sysex = %d, uart irq = %d, msg = %d\n", dsp->midi_in_sysex, dsp->uart_irq, len);
 
@@ -1068,11 +1067,11 @@ sb_dsp_input_msg(void *p, uint8_t *msg, uint32_t len)
         return;
 
     if (dsp->uart_irq) {
-        for (i = 0; i < len; i++)
+        for (uint32_t i = 0; i < len; i++)
             sb_add_data(dsp, msg[i]);
         sb_irq(dsp, 1);
     } else if (dsp->midi_in_poll) {
-        for (i = 0; i < len; i++)
+        for (uint32_t i = 0; i < len; i++)
             sb_add_data(dsp, msg[i]);
     }
 }
diff --git a/src/sound/sound.c b/src/sound/sound.c
index dbfaf0fdb..d8791d0cf 100644
--- a/src/sound/sound.c
+++ b/src/sound/sound.c
@@ -202,7 +202,7 @@ sound_card_get_internal_name(int card)
 }
 
 int
-sound_card_get_from_internal_name(char *s)
+sound_card_get_from_internal_name(const char *s)
 {
     int c = 0;
 
diff --git a/src/usb.c b/src/usb.c
index 75e60d438..8ac16edd1 100644
--- a/src/usb.c
+++ b/src/usb.c
@@ -26,8 +26,8 @@
 #include <86box/device.h>
 #include <86box/io.h>
 #include <86box/mem.h>
+#include <86box/timer.h>
 #include <86box/usb.h>
-#include "cpu.h"
 
 #ifdef ENABLE_USB_LOG
 int usb_do_log = ENABLE_USB_LOG;
@@ -47,6 +47,48 @@ usb_log(const char *fmt, ...)
 #    define usb_log(fmt, ...)
 #endif
 
+/* OHCI registers */
+enum
+{
+    OHCI_HcRevision = 0x00,
+    OHCI_HcControl = 0x04,
+    OHCI_HcCommandStatus = 0x08,
+    OHCI_HcInterruptStatus = 0x0C,
+    OHCI_HcInterruptEnable = 0x10,
+    OHCI_HcInterruptDisable = 0x14,
+    OHCI_HcHCCA = 0x18,
+    OHCI_HcPeriodCurrentED = 0x1C,
+    OHCI_HcControlHeadED = 0x20,
+    OHCI_HcControlCurrentED = 0x24,
+    OHCI_HcBulkHeadED = 0x28,
+    OHCI_HcBulkCurrentED = 0x2C,
+    OHCI_HcDoneHead = 0x30,
+    OHCI_HcFMInterval = 0x34,
+    OHCI_HcFmRemaining = 0x38,
+    OHCI_HcFmNumber = 0x3C,
+    OHCI_HcPeriodicStart = 0x40,
+    OHCI_HcLSThreshold = 0x44,
+    OHCI_HcRhDescriptorA = 0x48,
+    OHCI_HcRhDescriptorB = 0x4C,
+    OHCI_HcRhStatus = 0x50,
+    OHCI_HcRhPortStatus1 = 0x54,
+    OHCI_HcRhPortStatus2 = 0x58,
+    OHCI_HcRhPortStatus3 = 0x5C
+};
+
+static void
+usb_interrupt_ohci(usb_t* usb)
+{
+    if (usb->ohci_mmio[OHCI_HcControl + 1] & 1) {
+        smi_raise();
+    }
+    else if (usb->usb_params != NULL) {
+        if (usb->usb_params->parent_priv != NULL && usb->usb_params->raise_interrupt != NULL) {
+            usb->usb_params->raise_interrupt(usb, usb->usb_params->parent_priv);
+        }
+    }
+}
+
 static uint8_t
 uhci_reg_read(uint16_t addr, void *p)
 {
@@ -147,6 +189,28 @@ ohci_mmio_read(uint32_t addr, void *p)
     return ret;
 }
 
+void
+ohci_update_frame_counter(void* priv)
+{
+    usb_t *dev = (usb_t *) priv;
+}
+
+void
+ohci_port_reset_callback(void* priv)
+{
+    usb_t *dev = (usb_t *) priv;
+
+    dev->ohci_mmio[OHCI_HcRhPortStatus1] &= ~0x10;
+}
+
+void
+ohci_port_reset_callback_2(void* priv)
+{
+    usb_t *dev = (usb_t *) priv;
+
+    dev->ohci_mmio[OHCI_HcRhPortStatus2] &= ~0x10;
+}
+
 static void
 ohci_mmio_write(uint32_t addr, uint8_t val, void *p)
 {
@@ -156,139 +220,140 @@ ohci_mmio_write(uint32_t addr, uint8_t val, void *p)
     addr &= 0x00000fff;
 
     switch (addr) {
-        case 0x04:
+        case OHCI_HcControl:
             if ((val & 0xc0) == 0x00) {
                 /* UsbReset */
-                dev->ohci_mmio[0x56] = dev->ohci_mmio[0x5a] = 0x16;
+                dev->ohci_mmio[OHCI_HcRhPortStatus1 + 2] = dev->ohci_mmio[OHCI_HcRhPortStatus2 + 2] = 0x16;
             }
             break;
-        case 0x08: /* HCCOMMANDSTATUS */
+        case OHCI_HcCommandStatus:
             /* bit OwnershipChangeRequest triggers an ownership change (SMM <-> OS) */
             if (val & 0x08) {
-                dev->ohci_mmio[0x0f] = 0x40;
-                if ((dev->ohci_mmio[0x13] & 0xc0) == 0xc0)
+                dev->ohci_mmio[OHCI_HcInterruptStatus + 3] = 0x40;
+                if ((dev->ohci_mmio[OHCI_HcInterruptEnable + 3] & 0xc0) == 0xc0)
                     smi_raise();
             }
 
             /* bit HostControllerReset must be cleared for the controller to be seen as initialized */
             if (val & 0x01) {
                 memset(dev->ohci_mmio, 0x00, 4096);
-                dev->ohci_mmio[0x00] = 0x10;
-                dev->ohci_mmio[0x01] = 0x01;
-                dev->ohci_mmio[0x48] = 0x02;
+                dev->ohci_mmio[OHCI_HcRevision] = 0x10;
+                dev->ohci_mmio[OHCI_HcRevision + 1] = 0x01;
+                dev->ohci_mmio[OHCI_HcRhDescriptorA] = 0x02;
                 val &= ~0x01;
             }
             break;
-        case 0x0c:
+        case OHCI_HcHCCA:
+            return;
+        case OHCI_HcInterruptStatus:
             dev->ohci_mmio[addr] &= ~(val & 0x7f);
             return;
-        case 0x0d:
-        case 0x0e:
+        case OHCI_HcInterruptStatus + 1:
+        case OHCI_HcInterruptStatus + 2:
             return;
-        case 0x0f:
+        case OHCI_HcInterruptStatus + 3:
             dev->ohci_mmio[addr] &= ~(val & 0x40);
             return;
-        case 0x3b:
+        case OHCI_HcFmRemaining + 3:
             dev->ohci_mmio[addr] = (val & 0x80);
             return;
-        case 0x39:
-        case 0x41:
+        case OHCI_HcFmRemaining + 1:
+        case OHCI_HcPeriodicStart + 1:
             dev->ohci_mmio[addr] = (val & 0x3f);
             return;
-        case 0x45:
+        case OHCI_HcLSThreshold + 1:
             dev->ohci_mmio[addr] = (val & 0x0f);
             return;
-        case 0x3a:
-        case 0x3e:
-        case 0x3f:
-        case 0x42:
-        case 0x43:
-        case 0x46:
-        case 0x47:
-        case 0x48:
-        case 0x4a:
+        case OHCI_HcFmRemaining + 2:
+        case OHCI_HcFmNumber + 2:
+        case OHCI_HcFmNumber + 3:
+        case OHCI_HcPeriodicStart + 2:
+        case OHCI_HcPeriodicStart + 3:
+        case OHCI_HcLSThreshold + 2:
+        case OHCI_HcLSThreshold + 3:
+        case OHCI_HcRhDescriptorA:
+        case OHCI_HcRhDescriptorA + 2:
             return;
-        case 0x49:
+        case OHCI_HcRhDescriptorA + 1:
             dev->ohci_mmio[addr] = (val & 0x1b);
             if (val & 0x02) {
-                dev->ohci_mmio[0x55] |= 0x01;
-                dev->ohci_mmio[0x59] |= 0x01;
+                dev->ohci_mmio[OHCI_HcRhPortStatus1 + 1] |= 0x01;
+                dev->ohci_mmio[OHCI_HcRhPortStatus2 + 1] |= 0x01;
             }
             return;
-        case 0x4b:
+        case OHCI_HcRhDescriptorA + 3:
             dev->ohci_mmio[addr] = (val & 0x03);
             return;
-        case 0x4c:
-        case 0x4e:
+        case OHCI_HcRhDescriptorB:
+        case OHCI_HcRhDescriptorB + 2:
             dev->ohci_mmio[addr] = (val & 0x06);
-            if ((addr == 0x4c) && !(val & 0x04)) {
-                if (!(dev->ohci_mmio[0x58] & 0x01))
-                    dev->ohci_mmio[0x5a] |= 0x01;
-                dev->ohci_mmio[0x58] |= 0x01;
+            if ((addr == OHCI_HcRhDescriptorB) && !(val & 0x04)) {
+                if (!(dev->ohci_mmio[OHCI_HcRhPortStatus2] & 0x01))
+                    dev->ohci_mmio[OHCI_HcRhPortStatus2 + 2] |= 0x01;
+                dev->ohci_mmio[OHCI_HcRhPortStatus2] |= 0x01;
             }
-            if ((addr == 0x4c) && !(val & 0x02)) {
-                if (!(dev->ohci_mmio[0x54] & 0x01))
-                    dev->ohci_mmio[0x56] |= 0x01;
-                dev->ohci_mmio[0x54] |= 0x01;
+            if ((addr == OHCI_HcRhDescriptorB) && !(val & 0x02)) {
+                if (!(dev->ohci_mmio[OHCI_HcRhPortStatus1] & 0x01))
+                    dev->ohci_mmio[OHCI_HcRhPortStatus1 + 2] |= 0x01;
+                dev->ohci_mmio[OHCI_HcRhPortStatus1] |= 0x01;
             }
             return;
-        case 0x4d:
-        case 0x4f:
+        case OHCI_HcRhDescriptorB + 1:
+        case OHCI_HcRhDescriptorB + 3:
             return;
-        case 0x50:
+        case OHCI_HcRhStatus:
             if (val & 0x01) {
-                if ((dev->ohci_mmio[0x49] & 0x03) == 0x00) {
-                    dev->ohci_mmio[0x55] &= ~0x01;
-                    dev->ohci_mmio[0x54] &= ~0x17;
-                    dev->ohci_mmio[0x56] &= ~0x17;
-                    dev->ohci_mmio[0x59] &= ~0x01;
-                    dev->ohci_mmio[0x58] &= ~0x17;
-                    dev->ohci_mmio[0x5a] &= ~0x17;
-                } else if ((dev->ohci_mmio[0x49] & 0x03) == 0x01) {
-                    if (!(dev->ohci_mmio[0x4e] & 0x02)) {
-                        dev->ohci_mmio[0x55] &= ~0x01;
-                        dev->ohci_mmio[0x54] &= ~0x17;
-                        dev->ohci_mmio[0x56] &= ~0x17;
+                if ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x00) {
+                    dev->ohci_mmio[OHCI_HcRhPortStatus1 + 1] &= ~0x01;
+                    dev->ohci_mmio[OHCI_HcRhPortStatus1] &= ~0x17;
+                    dev->ohci_mmio[OHCI_HcRhPortStatus1 + 2] &= ~0x17;
+                    dev->ohci_mmio[OHCI_HcRhPortStatus2 + 1] &= ~0x01;
+                    dev->ohci_mmio[OHCI_HcRhPortStatus2] &= ~0x17;
+                    dev->ohci_mmio[OHCI_HcRhPortStatus2 + 2] &= ~0x17;
+                } else if ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x01) {
+                    if (!(dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x02)) {
+                        dev->ohci_mmio[OHCI_HcRhPortStatus1 + 1] &= ~0x01;
+                        dev->ohci_mmio[OHCI_HcRhPortStatus1] &= ~0x17;
+                        dev->ohci_mmio[OHCI_HcRhPortStatus1 + 2] &= ~0x17;
                     }
-                    if (!(dev->ohci_mmio[0x4e] & 0x04)) {
-                        dev->ohci_mmio[0x59] &= ~0x01;
-                        dev->ohci_mmio[0x58] &= ~0x17;
-                        dev->ohci_mmio[0x5a] &= ~0x17;
+                    if (!(dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x04)) {
+                        dev->ohci_mmio[OHCI_HcRhPortStatus2 + 1] &= ~0x01;
+                        dev->ohci_mmio[OHCI_HcRhPortStatus2] &= ~0x17;
+                        dev->ohci_mmio[OHCI_HcRhPortStatus2 + 2] &= ~0x17;
                     }
                 }
             }
             return;
-        case 0x51:
+        case OHCI_HcRhStatus + 1:
             if (val & 0x80)
                 dev->ohci_mmio[addr] |= 0x80;
             return;
-        case 0x52:
+        case OHCI_HcRhStatus + 2:
             dev->ohci_mmio[addr] &= ~(val & 0x02);
             if (val & 0x01) {
-                if ((dev->ohci_mmio[0x49] & 0x03) == 0x00) {
-                    dev->ohci_mmio[0x55] |= 0x01;
-                    dev->ohci_mmio[0x59] |= 0x01;
-                } else if ((dev->ohci_mmio[0x49] & 0x03) == 0x01) {
-                    if (!(dev->ohci_mmio[0x4e] & 0x02))
-                        dev->ohci_mmio[0x55] |= 0x01;
-                    if (!(dev->ohci_mmio[0x4e] & 0x04))
-                        dev->ohci_mmio[0x59] |= 0x01;
+                if ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x00) {
+                    dev->ohci_mmio[OHCI_HcRhPortStatus1 + 1] |= 0x01;
+                    dev->ohci_mmio[OHCI_HcRhPortStatus2 + 1] |= 0x01;
+                } else if ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x01) {
+                    if (!(dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x02))
+                        dev->ohci_mmio[OHCI_HcRhPortStatus1 + 1] |= 0x01;
+                    if (!(dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x04))
+                        dev->ohci_mmio[OHCI_HcRhPortStatus2 + 1] |= 0x01;
                 }
             }
             return;
-        case 0x53:
+        case OHCI_HcRhStatus + 3:
             if (val & 0x80)
-                dev->ohci_mmio[0x51] &= ~0x80;
+                dev->ohci_mmio[OHCI_HcRhStatus + 1] &= ~0x80;
             return;
-        case 0x54:
-        case 0x58:
+        case OHCI_HcRhPortStatus1:
+        case OHCI_HcRhPortStatus2:
             old = dev->ohci_mmio[addr];
 
             if (val & 0x10) {
                 if (old & 0x01) {
                     dev->ohci_mmio[addr] |= 0x10;
-                    /* TODO: The clear should be on a 10 ms timer. */
-                    dev->ohci_mmio[addr] &= ~0x10;
+                    timer_on_auto(&dev->ohci_port_reset_timer[(addr - OHCI_HcRhPortStatus1) / 4], 10000.);
                     dev->ohci_mmio[addr + 2] |= 0x10;
                 } else
                     dev->ohci_mmio[addr + 2] |= 0x01;
@@ -315,36 +380,35 @@ ohci_mmio_write(uint32_t addr, uint8_t val, void *p)
             /* if (!(dev->ohci_mmio[addr] & 0x02))
                     dev->ohci_mmio[addr + 2] |= 0x02; */
             return;
-        case 0x55:
-            if ((val & 0x02) && ((dev->ohci_mmio[0x49] & 0x03) == 0x00) && (dev->ohci_mmio[0x4e] & 0x02)) {
+        case OHCI_HcRhPortStatus1 + 1:
+            if ((val & 0x02) && ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x00) && (dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x02)) {
                 dev->ohci_mmio[addr] &= ~0x01;
-                dev->ohci_mmio[0x54] &= ~0x17;
-                dev->ohci_mmio[0x56] &= ~0x17;
+                dev->ohci_mmio[OHCI_HcRhPortStatus1] &= ~0x17;
+                dev->ohci_mmio[OHCI_HcRhPortStatus1 + 2] &= ~0x17;
             }
-            if ((val & 0x01) && ((dev->ohci_mmio[0x49] & 0x03) == 0x00) && (dev->ohci_mmio[0x4e] & 0x02)) {
+            if ((val & 0x01) && ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x00) && (dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x02)) {
                 dev->ohci_mmio[addr] |= 0x01;
-                dev->ohci_mmio[0x58] &= ~0x17;
-                dev->ohci_mmio[0x5a] &= ~0x17;
+                dev->ohci_mmio[OHCI_HcRhPortStatus2] &= ~0x17;
+                dev->ohci_mmio[OHCI_HcRhPortStatus2 + 2] &= ~0x17;
             }
             return;
-        case 0x59:
-            if ((val & 0x02) && ((dev->ohci_mmio[0x49] & 0x03) == 0x00) && (dev->ohci_mmio[0x4e] & 0x04))
+        case OHCI_HcRhPortStatus2 + 1:
+            if ((val & 0x02) && ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x00) && (dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x04))
                 dev->ohci_mmio[addr] &= ~0x01;
-            if ((val & 0x01) && ((dev->ohci_mmio[0x49] & 0x03) == 0x00) && (dev->ohci_mmio[0x4e] & 0x04))
+            if ((val & 0x01) && ((dev->ohci_mmio[OHCI_HcRhDescriptorA + 1] & 0x03) == 0x00) && (dev->ohci_mmio[OHCI_HcRhDescriptorB + 2] & 0x04))
                 dev->ohci_mmio[addr] |= 0x01;
             return;
-        case 0x56:
-        case 0x5a:
+        case OHCI_HcRhPortStatus1 + 2:
+        case OHCI_HcRhPortStatus2 + 2:
             dev->ohci_mmio[addr] &= ~(val & 0x1f);
             return;
-        case 0x57:
-        case 0x5b:
+        case OHCI_HcRhPortStatus1 + 3:
+        case OHCI_HcRhPortStatus2 + 3:
             return;
     }
 
     dev->ohci_mmio[addr] = val;
 }
-
 void
 ohci_update_mem_mapping(usb_t *dev, uint8_t base1, uint8_t base2, uint8_t base3, int enable)
 {
@@ -368,9 +432,9 @@ usb_reset(void *priv)
     dev->uhci_io[0x10] = dev->uhci_io[0x12] = 0x80;
 
     memset(dev->ohci_mmio, 0x00, 4096);
-    dev->ohci_mmio[0x00] = 0x10;
-    dev->ohci_mmio[0x01] = 0x01;
-    dev->ohci_mmio[0x48] = 0x02;
+    dev->ohci_mmio[OHCI_HcRevision] = 0x10;
+    dev->ohci_mmio[OHCI_HcRevision + 1] = 0x01;
+    dev->ohci_mmio[OHCI_HcRhDescriptorA] = 0x02;
 
     io_removehandler(dev->uhci_io_base, 0x20, uhci_reg_read, NULL, NULL, uhci_reg_write, uhci_reg_writew, NULL, dev);
     dev->uhci_enable = 0;
@@ -388,7 +452,7 @@ usb_close(void *priv)
 }
 
 static void *
-usb_init(const device_t *info)
+usb_init_ext(const device_t *info, void* params)
 {
     usb_t *dev;
 
@@ -397,19 +461,15 @@ usb_init(const device_t *info)
         return (NULL);
     memset(dev, 0x00, sizeof(usb_t));
 
-    memset(dev->uhci_io, 0x00, 128);
-    dev->uhci_io[0x0c] = 0x40;
-    dev->uhci_io[0x10] = dev->uhci_io[0x12] = 0x80;
-
-    memset(dev->ohci_mmio, 0x00, 4096);
-    dev->ohci_mmio[0x00] = 0x10;
-    dev->ohci_mmio[0x01] = 0x01;
-    dev->ohci_mmio[0x48] = 0x02;
+    dev->usb_params = (usb_params_t*)params;
 
     mem_mapping_add(&dev->ohci_mmio_mapping, 0, 0,
                     ohci_mmio_read, NULL, NULL,
                     ohci_mmio_write, NULL, NULL,
                     NULL, MEM_MAPPING_EXTERNAL, dev);
+    timer_add(&dev->ohci_frame_timer, ohci_update_frame_counter, dev, 0); /* Unused for now, to be used for frame counting. */
+    timer_add(&dev->ohci_port_reset_timer[0], ohci_port_reset_callback, dev, 0);
+    timer_add(&dev->ohci_port_reset_timer[1], ohci_port_reset_callback_2, dev, 0);
     usb_reset(dev);
 
     return dev;
@@ -418,9 +478,9 @@ usb_init(const device_t *info)
 const device_t usb_device = {
     .name          = "Universal Serial Bus",
     .internal_name = "usb",
-    .flags         = DEVICE_PCI,
+    .flags         = DEVICE_PCI | DEVICE_EXTPARAMS,
     .local         = 0,
-    .init          = usb_init,
+    .init_ext      = usb_init_ext,
     .close         = usb_close,
     .reset         = usb_reset,
     { .available = NULL },
diff --git a/src/video/vid_ht216.c b/src/video/vid_ht216.c
index 255a9e8b8..91c542164 100644
--- a/src/video/vid_ht216.c
+++ b/src/video/vid_ht216.c
@@ -1347,7 +1347,7 @@ ht216_read_common(ht216_t *ht216, uint32_t addr)
         temp = 0xff;
 
         for (pixel = 0; pixel < 8; pixel++) {
-            for (plane = 0; plane < (1 << count); plane++) {
+            for (plane = 0; plane < (uint8_t)(1 << count); plane++) {
                 if (svga->colournocare & (1 << plane)) {
                     /* If we care about a plane, and the pixel has a mismatch on it, clear its bit. */
                     if (((svga->latch.b[plane] >> pixel) & 1) != ((svga->colourcompare >> plane) & 1))
diff --git a/src/video/vid_s3.c b/src/video/vid_s3.c
index 68b84d9db..346de2ebb 100644
--- a/src/video/vid_s3.c
+++ b/src/video/vid_s3.c
@@ -8928,7 +8928,7 @@ const device_t s3_9fx_771_pci_device = {
     { .available = s3_9fx_771_available },
     .speed_changed = s3_speed_changed,
     .force_redraw  = s3_force_redraw,
-    .config        = s3_standard_config
+    .config        = s3_968_config
 };
 
 const device_t s3_phoenix_vision968_pci_device = {
diff --git a/src/win/Makefile.mingw b/src/win/Makefile.mingw
index 76d402a83..0aa08d267 100644
--- a/src/win/Makefile.mingw
+++ b/src/win/Makefile.mingw
@@ -209,6 +209,9 @@ endif
 ifndef MINITRACE
  MINITRACE := n
 endif
+ifndef AVX
+ AVX := n
+endif
 ifeq ($(DYNAREC), y)
  ifeq ($(ARM), y)
   ifeq ($(NEW_DYNAREC), n)
@@ -238,7 +241,7 @@ PROG := 86Box
 #########################################################################
 #              Nothing should need changing from here on..              #
 #########################################################################
-VPATH := $(EXPATH) . $(CODEGEN) minitrace cpu \
+VPATH := $(EXPATH) . $(CODEGEN) minitrace cpu cpu/softfloat \
          cdrom chipset device disk disk/minivhd floppy \
          game machine mem printer \
          sio sound \
@@ -314,7 +317,11 @@ else
   endif
  endif
 endif
-AFLAGS := -msse2 -mfpmath=sse
+ifeq ($(AVX), y)
+ AFLAGS := -msse2 -msse3 -mssse3 -msse4 -msse4a -mavx -mavx2 -mfpmath=sse
+else
+ AFLAGS := -msse2 -mfpmath=sse
+endif
 ifeq ($(ARM), y)
  DFLAGS := -march=armv7-a
  AOPTIM :=
@@ -544,7 +551,9 @@ CPUOBJ := $(DYNARECOBJ) \
           $(CGTOBJ) \
           cpu.o cpu_table.o fpu.o x86.o \
           8080.o 808x.o 386.o 386_common.o 386_dynarec.o 386_dynarec_ops.o \
-          x86seg.o x87.o x87_timings.o
+          x86seg.o x87.o x87_timings.o \
+	  f2xm1.o fpatan.o fprem.o fsincos.o fyl2x.o softfloat_poly.o softfloat.o softfloat16.o \
+	  softfloat-muladd.o softfloat-round-pack.o softfloat-specialize.o softfloatx80.o
 
 CHIPSETOBJ := 82c100.o acc2168.o \
               contaq_82c59x.o \
@@ -581,11 +590,8 @@ MCHOBJ := machine.o machine_table.o \
           m_at_socket8.o m_at_slot1.o m_at_slot2.o m_at_socket370.o \
           m_at_misc.o
 
-ifeq ($(NEW_KBC), y)
- KBCOBJ := kbc_at.o kbd_at.o
-else
- KBCOBJ := keyboard_at.o
-endif
+KBCOBJ := kbc_at.o kbc_at_dev.o \
+          keyboard_at.o
 
 DEVOBJ := bugger.o cartridge.o cassette.o hasp.o hwm.o hwm_lm75.o hwm_lm78.o hwm_gl518sm.o hwm_vt82c686.o \
           ibm_5161.o isamem.o isartc.o lpt.o pci_bridge.o postcard.o serial.o \
@@ -870,7 +876,7 @@ all: $(PROG).exe
 
 86Box.res: 86Box.rc
 	@echo Processing $<
-	@$(WINDRES) $(RFLAGS) $(EXTRAS) -i $< -o 86Box.res
+	@$(WINDRES) -v $(RFLAGS) $(EXTRAS) -i $< -o 86Box.res
 
 $(PROG).exe: $(OBJ) 86Box.res
 	@echo Linking $(PROG).exe ..
diff --git a/src/win/languages/cs-CZ.rc b/src/win/languages/cs-CZ.rc
index a2fe0eaa6..262f059b0 100644
--- a/src/win/languages/cs-CZ.rc
+++ b/src/win/languages/cs-CZ.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Zapnuta (místní čas)"
 #define STR_ENABLED_UTC   "Zapnuta (UTC)"
 #define STR_DYNAREC       "Dynamický překladač"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Grafika:"
 #define STR_VIDEO_2       "Grafika 2:"
diff --git a/src/win/languages/de-DE.rc b/src/win/languages/de-DE.rc
index 735aac2b2..b2e25e332 100644
--- a/src/win/languages/de-DE.rc
+++ b/src/win/languages/de-DE.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Aktiviert (Lokale Uhrzeit)"
 #define STR_ENABLED_UTC   "Aktiviert (UTC)"
 #define STR_DYNAREC       "Dynamischer Recompiler"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Videokarte:"
 #define STR_VIDEO_2       "Videokarte 2:"
diff --git a/src/win/languages/dialogs.rc b/src/win/languages/dialogs.rc
index 12613a04f..ef3b8ac62 100644
--- a/src/win/languages/dialogs.rc
+++ b/src/win/languages/dialogs.rc
@@ -232,20 +232,25 @@ BEGIN
                     CFG_HMARGIN, 120, 120, CFG_CHECKBOX_HEIGHT
 #endif
 
+    CONTROL         STR_SOFTFLOAT, IDC_CHECK_SOFTFLOAT,
+                    "Button", BS_AUTOCHECKBOX | WS_TABSTOP,
+                    CFG_HMARGIN, 135, 120, CFG_CHECKBOX_HEIGHT
+
+
     GROUPBOX        STR_TIME_SYNC, IDC_TIME_SYNC,
-                    CFG_HMARGIN, 135, 110, 56
+                    CFG_HMARGIN, 150, 110, 56
 
     CONTROL         STR_DISABLED, IDC_RADIO_TS_DISABLED,
                     "Button", BS_AUTORADIOBUTTON | WS_GROUP | WS_TABSTOP,
-                    14, 147, CFG_CHECKBOX_PRI_WIDTH, CFG_CHECKBOX_HEIGHT
+                    14, 162, CFG_CHECKBOX_PRI_WIDTH, CFG_CHECKBOX_HEIGHT
 
     CONTROL         STR_ENABLED_LOCAL, IDC_RADIO_TS_LOCAL,
                     "Button", BS_AUTORADIOBUTTON | WS_TABSTOP,
-                    14, 161, CFG_CHECKBOX_PRI_WIDTH, CFG_CHECKBOX_HEIGHT
+                    14, 176, CFG_CHECKBOX_PRI_WIDTH, CFG_CHECKBOX_HEIGHT
 
     CONTROL         STR_ENABLED_UTC, IDC_RADIO_TS_UTC,
                     "Button", BS_AUTORADIOBUTTON | WS_TABSTOP,
-                    14, 175, CFG_CHECKBOX_PRI_WIDTH, CFG_CHECKBOX_HEIGHT
+                    14, 190, CFG_CHECKBOX_PRI_WIDTH, CFG_CHECKBOX_HEIGHT
 END
 
 DLG_CFG_VIDEO DIALOG DISCARDABLE  CFG_PANE_LEFT, CFG_PANE_TOP, CFG_PANE_WIDTH, CFG_PANE_HEIGHT
@@ -974,6 +979,7 @@ END
 #undef STR_ENABLED_LOCAL
 #undef STR_ENABLED_UTC
 #undef STR_DYNAREC
+#undef STR_SOFTFLOAT
 
 #undef STR_VIDEO
 #undef STR_VIDEO_2
diff --git a/src/win/languages/en-GB.rc b/src/win/languages/en-GB.rc
index f94c6ab3b..a0ed1487e 100644
--- a/src/win/languages/en-GB.rc
+++ b/src/win/languages/en-GB.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Enabled (local time)"
 #define STR_ENABLED_UTC   "Enabled (UTC)"
 #define STR_DYNAREC       "Dynamic Recompiler"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Video:"
 #define STR_VIDEO_2       "Video 2:"
diff --git a/src/win/languages/en-US.rc b/src/win/languages/en-US.rc
index a0719ac1d..9464c6d6b 100644
--- a/src/win/languages/en-US.rc
+++ b/src/win/languages/en-US.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Enabled (local time)"
 #define STR_ENABLED_UTC   "Enabled (UTC)"
 #define STR_DYNAREC       "Dynamic Recompiler"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Video:"
 #define STR_VIDEO_2       "Video 2:"
diff --git a/src/win/languages/es-ES.rc b/src/win/languages/es-ES.rc
index 6bfce6e52..5f138c3c2 100644
--- a/src/win/languages/es-ES.rc
+++ b/src/win/languages/es-ES.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Habilitado (hora local)"
 #define STR_ENABLED_UTC   "Habilitado (UTC)"
 #define STR_DYNAREC       "Recompilador Dinámico"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Vídeo:"
 #define STR_VIDEO_2       "Vídeo 2:"
diff --git a/src/win/languages/fi-FI.rc b/src/win/languages/fi-FI.rc
index 8cd80a4e7..c32f94b41 100644
--- a/src/win/languages/fi-FI.rc
+++ b/src/win/languages/fi-FI.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Käytössä (paikallinen)"
 #define STR_ENABLED_UTC   "Käytössä (UTC)"
 #define STR_DYNAREC       "Dynaaminen uudelleenkääntäjä"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Näytönohjain:"
 #define STR_VIDEO_2       "Näytönohjain 2:"
diff --git a/src/win/languages/fr-FR.rc b/src/win/languages/fr-FR.rc
index 9e0296c4c..bffedce44 100644
--- a/src/win/languages/fr-FR.rc
+++ b/src/win/languages/fr-FR.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Activé (heure locale)"
 #define STR_ENABLED_UTC   "Activé (UTC)"
 #define STR_DYNAREC       "Recompilateur dynamique"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Vidéo:"
 #define STR_VIDEO_2       "Vidéo 2:"
diff --git a/src/win/languages/hr-HR.rc b/src/win/languages/hr-HR.rc
index d8a19690e..b79defcbe 100644
--- a/src/win/languages/hr-HR.rc
+++ b/src/win/languages/hr-HR.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Uključeno (lokalno vrijeme)"
 #define STR_ENABLED_UTC   "Uključeno (UTC)"
 #define STR_DYNAREC       "Dinamički rekompilator"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Video:"
 #define STR_VIDEO_2       "Video 2:"
diff --git a/src/win/languages/hu-HU.rc b/src/win/languages/hu-HU.rc
index b08797ab9..9486d8e61 100644
--- a/src/win/languages/hu-HU.rc
+++ b/src/win/languages/hu-HU.rc
@@ -283,6 +283,7 @@ END
 #define STR_ENABLED_LOCAL "Engedélyezve (helyi idő)"
 #define STR_ENABLED_UTC   "Engedélyezve (UTC)"
 #define STR_DYNAREC       "Dinamikus újrafordítás"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Videokártya:"
 #define STR_VIDEO_2       "Videokártya 2:"
diff --git a/src/win/languages/it-IT.rc b/src/win/languages/it-IT.rc
index 0790813a0..02ca9bf9f 100644
--- a/src/win/languages/it-IT.rc
+++ b/src/win/languages/it-IT.rc
@@ -279,6 +279,7 @@ END
 #define STR_ENABLED_LOCAL "Abilitata (ora locale)"
 #define STR_ENABLED_UTC   "Abilitata (UTC)"
 #define STR_DYNAREC       "Ricompilatore dinamico"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Video:"
 #define STR_VIDEO_2       "Video 2:"
diff --git a/src/win/languages/ja-JP.rc b/src/win/languages/ja-JP.rc
index 512bde8a0..89cc42011 100644
--- a/src/win/languages/ja-JP.rc
+++ b/src/win/languages/ja-JP.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "有効にする (現地時間)"
 #define STR_ENABLED_UTC   "有効にする (UTC)"
 #define STR_DYNAREC       "動的リコンパイラ"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "ビデオカード:"
 #define STR_VIDEO_2       "ビデオカード 2:"
diff --git a/src/win/languages/ko-KR.rc b/src/win/languages/ko-KR.rc
index 49dfe34ba..cbe3e4752 100644
--- a/src/win/languages/ko-KR.rc
+++ b/src/win/languages/ko-KR.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "사용 (현지 시간)"
 #define STR_ENABLED_UTC   "사용 (UTC)"
 #define STR_DYNAREC       "동적 재컴파일"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "비디오 카드:"
 #define STR_VIDEO_2       "비디오 카드 2:"
diff --git a/src/win/languages/pl-PL.rc b/src/win/languages/pl-PL.rc
index 803971ee1..84fccb70d 100644
--- a/src/win/languages/pl-PL.rc
+++ b/src/win/languages/pl-PL.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Włączona (czas lokalny)"
 #define STR_ENABLED_UTC   "Włączona (UTC)"
 #define STR_DYNAREC       "Dynamiczny rekompilator"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Wideo:"
 #define STR_VIDEO_2       "Wideo 2:"
diff --git a/src/win/languages/pt-BR.rc b/src/win/languages/pt-BR.rc
index 669bb91b3..b01a214ff 100644
--- a/src/win/languages/pt-BR.rc
+++ b/src/win/languages/pt-BR.rc
@@ -281,6 +281,7 @@ END
 #define STR_ENABLED_LOCAL "Ativar (hora local)"
 #define STR_ENABLED_UTC   "Ativar (UTC)"
 #define STR_DYNAREC       "Recompilador dinâmico"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Vídeo:"
 #define STR_VIDEO_2       "Vídeo 2:"
diff --git a/src/win/languages/pt-PT.rc b/src/win/languages/pt-PT.rc
index 50f2852c8..6390453dd 100644
--- a/src/win/languages/pt-PT.rc
+++ b/src/win/languages/pt-PT.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Ativada (hora local)"
 #define STR_ENABLED_UTC   "Ativada (UTC)"
 #define STR_DYNAREC       "Recompilador dinâmico"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Vídeo:"
 #define STR_VIDEO_2       "Vídeo 2:"
diff --git a/src/win/languages/ru-RU.rc b/src/win/languages/ru-RU.rc
index 741e6466e..f49247894 100644
--- a/src/win/languages/ru-RU.rc
+++ b/src/win/languages/ru-RU.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Включить (местное)"
 #define STR_ENABLED_UTC   "Включить (UTC)"
 #define STR_DYNAREC       "Динамический рекомпилятор"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Видеокарта:"
 #define STR_VIDEO_2       "Видеокарта 2:"
diff --git a/src/win/languages/sl-SI.rc b/src/win/languages/sl-SI.rc
index 3f632f84c..beeb9183b 100644
--- a/src/win/languages/sl-SI.rc
+++ b/src/win/languages/sl-SI.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Omogočeno (lokalni čas)"
 #define STR_ENABLED_UTC   "Omogočeno (UTC)"
 #define STR_DYNAREC       "Dinamični prevajalnik"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Video:"
 #define STR_VIDEO_2       "Video 2:"
diff --git a/src/win/languages/tr-TR.rc b/src/win/languages/tr-TR.rc
index 38f9bd16d..04e00eb0d 100644
--- a/src/win/languages/tr-TR.rc
+++ b/src/win/languages/tr-TR.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Etkin (yerel zaman)"
 #define STR_ENABLED_UTC   "Etkin (UTC)"
 #define STR_DYNAREC       "Dinamik Derleyici"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Ekran kartı:"
 #define STR_VIDEO_2       "Ekran kartı 2:"
diff --git a/src/win/languages/uk-UA.rc b/src/win/languages/uk-UA.rc
index b3c7f4017..fc3b29c84 100644
--- a/src/win/languages/uk-UA.rc
+++ b/src/win/languages/uk-UA.rc
@@ -250,7 +250,7 @@ END
 #define STR_CANCEL        "Відміна"
 #define STR_GLOBAL        "Зберегти ці параметри як &глобальні за замовчуванням"
 #define STR_DEFAULT       "&За замовчуванням"
-#define STR_LANGUAGE      "Язык:"
+#define STR_LANGUAGE      "Мова:"
 #define STR_ICONSET       "Набір іконок:"
 
 #define STR_GAIN          "Посилення"
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "Увімкнути (місцеве)"
 #define STR_ENABLED_UTC   "Увімкнути (UTC)"
 #define STR_DYNAREC       "Динамічний рекомпілятор"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "Відеокарта:"
 #define STR_VIDEO_2       "Відеокарта 2:"
@@ -306,11 +307,11 @@ END
 
 #define STR_NET_TYPE      "Тип мережі:"
 #define STR_PCAP          "Пристрій PCap:"
-#define STR_NET           "Мережева карта:"
-#define STR_NET1          "Network card 1:"
-#define STR_NET2          "Network card 2:"
-#define STR_NET3          "Network card 3:"
-#define STR_NET4          "Network card 4:"
+#define STR_NET           "Мережевий адаптер:"
+#define STR_NET1          "Мережева карта 1:"
+#define STR_NET2          "Мережева карта 2:"
+#define STR_NET3          "Мережева карта 3:"
+#define STR_NET4          "Мережева карта 4:"
 
 #define STR_COM1          "Пристрій COM1:"
 #define STR_COM2          "Пристрій COM2:"
@@ -404,11 +405,11 @@ BEGIN
     IDS_2055    "Образи ZIP (*.IM?;*.ZDI)\0*.IM?;*.ZDI\0"
     IDS_2056    "86Box не зміг знайти жодного відповідного для використання файлу з ПЗУ.\n\nБудь ласка <a href=""https://github.com/86Box/roms/releases/latest"">завантажте</a> набір ПЗУ і витягніть його в каталог ""roms""."
     IDS_2057    "(порожньо)"
-    IDS_2058    "Образи ZIP (*.IM?;*.ZDI)\0*.IM?;*.ZDI\0Всі файли (*.*)\0*.*\0"
+    IDS_2058    "Образи ZIP (*.IM?;*.ZDI)\0*.IM?;*.ZDI\0Усі файли (*.*)\0*.*\0"
     IDS_2059    "Турбо"
     IDS_2060    "Увімк"
     IDS_2061    "Вимк"
-    IDS_2062    "Всі образи (*.86F;*.DSK;*.FLP;*.IM?;*.*FD?)\0*.86F;*.DSK;*.FLP;*.IM?;*.*FD?\0Прості посекторні образи (*.DSK;*.FLP;*.IM?;*.*FD?)\0*.DSK;*.FLP;*.IM?;*.IMG;*.*FD?\0Образ поверхні (*.86F)\0*.86F\0"
+    IDS_2062    "Усі образи (*.86F;*.DSK;*.FLP;*.IM?;*.*FD?)\0*.86F;*.DSK;*.FLP;*.IM?;*.*FD?\0Прості посекторні образи (*.DSK;*.FLP;*.IM?;*.*FD?)\0*.DSK;*.FLP;*.IM?;*.IMG;*.*FD?\0Образ поверхні (*.86F)\0*.86F\0"
     IDS_2063    "Системна плата ""%hs"" недоступна через відсутність файлу її ПЗУ в каталозі roms/machines. Переключення на доступну системну плату."
 END
 
@@ -464,14 +465,14 @@ BEGIN
     IDS_2107    "%u"
     IDS_2108    "%u МБ (CHS: %i, %i, %i)"
     IDS_2109    "Дисковод %i (%s): %ls"
-    IDS_2110    "Всі образи (*.0??;*.1??;*.??0;*.86F;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.JSON;*.TD0;*.*FD?;*.MFM;*.XDF)\0*.0??;*.1??;*.??0;*.86F;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.JSON;*.TD0;*.*FD?;*.MFM;*.XDF\0Розширені образи секторів (*.IMD;*.JSON;*.TD0)\0*.IMD;*.JSON;*.TD0\0Основні образи секторів (*.0??;*.1??;*.??0;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.XDF;*.*FD?)\0*.0??;*.1??;*.??0;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.XDF;*.*FD?\0Образи Flux (*.FDI)\0*.FDI\0Образи Surface (*.86F;*.MFM)\0*.86F;*.MFM\0Всі файли (*.*)\0*.*\0"
+    IDS_2110    "Усі образи (*.0??;*.1??;*.??0;*.86F;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.JSON;*.TD0;*.*FD?;*.MFM;*.XDF)\0*.0??;*.1??;*.??0;*.86F;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.JSON;*.TD0;*.*FD?;*.MFM;*.XDF\0Розширені образи секторів (*.IMD;*.JSON;*.TD0)\0*.IMD;*.JSON;*.TD0\0Основні образи секторів (*.0??;*.1??;*.??0;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.XDF;*.*FD?)\0*.0??;*.1??;*.??0;*.BIN;*.CQ?;*.D??;*.FLP;*.HDM;*.IM?;*.XDF;*.*FD?\0Образи Flux (*.FDI)\0*.FDI\0Образи Surface (*.86F;*.MFM)\0*.86F;*.MFM\0Усі файли (*.*)\0*.*\0"
     IDS_2111    "Неможливо ініціалізувати FreeType"
     IDS_2112    "Неможливо ініціалізувати SDL, потрібно SDL2.dll"
     IDS_2113    "Ви впевнені, що хочете виконати холодне перезавантаження емульованої машини?"
     IDS_2114    "Ви впевнені, що хочете вийти з 86Box?"
     IDS_2115    "Неможливо ініціалізувати Ghostscript"
     IDS_2116    "Магнітооптичний %i (%ls): %ls"
-    IDS_2117    "Образи магнітооптичних дисків (*.IM?;*.MDI)\0*.IM?;*.MDI\0Все файлы (*.*)\0*.*\0"
+    IDS_2117    "Образи магнітооптичних дисків (*.IM?;*.MDI)\0*.IM?;*.MDI\0Усі файлі (*.*)\0*.*\0"
     IDS_2118    "Ласкаво просимо в 86Box!"
     IDS_2119    "Вбудований контролер"
     IDS_2120    "Вихід"
@@ -503,7 +504,7 @@ BEGIN
 #else
 #define LIB_NAME_GS "libgs"
 #endif
-    IDS_2133    LIB_NAME_GS " потрібно для автоматичного перетворення файлів PostScript в PDF.\n\nВсі документи, відправлені на загальний принтер PostScript, будуть збережені у вигляді файлів PostScript (.ps)."
+    IDS_2133    LIB_NAME_GS " потрібно для автоматичного перетворення файлів PostScript в PDF.\n\nсі документи, відправлені на загальний принтер PostScript, будуть збережені у вигляді файлів PostScript (.ps)."
 #ifdef _WIN32
 #define LIB_NAME_FLUIDSYNTH "libfluidsynth.dll"
 #else
@@ -527,7 +528,7 @@ BEGIN
     IDS_2149    "Касета: %s"
     IDS_2150    "Образи касет (*.PCM;*.RAW;*.WAV;*.CAS)\0*.PCM;*.RAW;*.WAV;*.CAS\0Усі файли (*.*)\0*. *\0"
     IDS_2151    "Картридж %i: %ls"
-    IDS_2152    "Образи картриджів (*.A;*.B;*.JRC)\0*.A;*.B;*.JRC\0Всі файли (*.*)\0*.*\0"
+    IDS_2152    "Образи картриджів (*.A;*.B;*.JRC)\0*.A;*.B;*.JRC\0Усі файли (*.*)\0*.*\0"
     IDS_2153    "Помилка ініціалізації рендерера"
     IDS_2154    "Неможливо ініціалізувати рендерер OpenGL (3.0). Будь ласка, використовуйте інший рендерер."
     IDS_2155    "Відновити виконання"
@@ -541,7 +542,7 @@ BEGIN
     IDS_2163    "No Dynarec"
     IDS_2164    "Old Dynarec"
     IDS_2165    "New Dynarec"
-    IDS_2166    "Video card #2 ""%hs"" is not available due to missing ROMs in the roms/video directory. Disabling the second video card."
+    IDS_2166    "Відеокарта #2 ""%hs"" недоступна через відсутність файлу її ПЗУ в каталозі roms/video. Відключення другої відеокарти."
 END
 
 STRINGTABLE DISCARDABLE
@@ -556,7 +557,7 @@ BEGIN
     IDS_4103    "Вибрати існуючий жорсткий диск"
     IDS_4104    "Розмір образів дисків HDI не може перевищувати 4 ГБ."
     IDS_4105    "Розмір образів дисків не може перевищувати 127 ГБ."
-    IDS_4106    "Образи жорстких дисків (*.HD?;*.IM?;*.VHD)\0*.HD?;*.IM?;*.VHD\0Всі файли (*.*)\0*.*\0 "
+    IDS_4106    "Образи жорстких дисків (*.HD?;*.IM?;*.VHD)\0*.HD?;*.IM?;*.VHD\0Усі файли (*.*)\0*.*\0 "
     IDS_4107    "Неможливо прочитати файл"
     IDS_4108    "Неможливо записати файл"
     IDS_4109    "Образи HDI або HDX з розміром сектора, відмінним від 512, не підтримуються."
@@ -580,7 +581,7 @@ BEGIN
     IDS_4127    "Диференційований образ VHD (.vhd)"
     IDS_4128    "Великі блоки (2 МБ)"
     IDS_4129    "Маленькі блоки (512 КБ)"
-    IDS_4130    "Файли VHD (*.VHD)\0*.VHD\0Всі файли (*.*)\0*.*\0"
+    IDS_4130    "Файли VHD (*.VHD)\0*.VHD\0Усі файли (*.*)\0*.*\0"
     IDS_4131    "Виберіть батьківський VHD"
     IDS_4132    "Це може означати, що батьківський образ був змінений після того, як було створено диференційований образ.\n\nЦе також може статися, якщо файли зображення були переміщені або скопійовані, або через помилку в програмі, що створила цей диск.\n \nВи хочете виправити тимчасові позначки?"
     IDS_4133    "Тимчасові мітки батьківського та дочірнього дисків не співпадають"
diff --git a/src/win/languages/zh-CN.rc b/src/win/languages/zh-CN.rc
index 7a69ccbac..40ed20210 100644
--- a/src/win/languages/zh-CN.rc
+++ b/src/win/languages/zh-CN.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "启用 (本地时间)"
 #define STR_ENABLED_UTC   "启用 (UTC)"
 #define STR_DYNAREC       "动态重编译器"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "显卡:"
 #define STR_VIDEO_2       "显卡 2:"
diff --git a/src/win/languages/zh-TW.rc b/src/win/languages/zh-TW.rc
index eda95096f..d6922164b 100644
--- a/src/win/languages/zh-TW.rc
+++ b/src/win/languages/zh-TW.rc
@@ -278,6 +278,7 @@ END
 #define STR_ENABLED_LOCAL "啟用 (本地時間)"
 #define STR_ENABLED_UTC   "啟用 (UTC)"
 #define STR_DYNAREC       "動態重編譯器"
+#define STR_SOFTFLOAT     "Softfloat FPU"
 
 #define STR_VIDEO         "顯示卡:"
 #define STR_VIDEO_2       "顯示卡 2:"
diff --git a/src/win/win_settings.c b/src/win/win_settings.c
index 7129230a3..104ae0078 100644
--- a/src/win/win_settings.c
+++ b/src/win/win_settings.c
@@ -91,6 +91,7 @@ static uint32_t      temp_mem_size;
 #ifdef USE_DYNAREC
 static int temp_dynarec;
 #endif
+static int temp_fpu_softfloat;
 
 /* Video category */
 static int temp_gfxcard[2], temp_ibm8514, temp_voodoo, temp_xga;
@@ -331,6 +332,7 @@ win_settings_init(void)
 #ifdef USE_DYNAREC
     temp_dynarec = cpu_use_dynarec;
 #endif
+    temp_fpu_softfloat = fpu_softfloat;
     temp_fpu  = fpu_type;
     temp_sync = time_sync;
 
@@ -460,6 +462,7 @@ win_settings_changed(void)
 #ifdef USE_DYNAREC
     i = i || (temp_dynarec != cpu_use_dynarec);
 #endif
+    i = i || (temp_fpu_softfloat != fpu_softfloat);
     i = i || (temp_fpu != fpu_type);
     i = i || (temp_sync != time_sync);
 
@@ -553,6 +556,7 @@ win_settings_save(void)
 #ifdef USE_DYNAREC
     cpu_use_dynarec = temp_dynarec;
 #endif
+    fpu_softfloat  = temp_fpu_softfloat;
     fpu_type  = temp_fpu;
     time_sync = temp_sync;
 
@@ -678,6 +682,8 @@ win_settings_machine_recalc_fpu(HWND hdlg)
         c++;
     }
 
+    settings_set_check(hdlg, IDC_CHECK_SOFTFLOAT, temp_fpu_softfloat);
+
     settings_enable_window(hdlg, IDC_COMBO_FPU, c > 1);
 
     temp_fpu = fpu_get_type_from_index(temp_cpu_f, temp_cpu, settings_get_cur_sel(hdlg, IDC_COMBO_FPU));
@@ -921,6 +927,8 @@ win_settings_machine_proc(HWND hdlg, UINT message, WPARAM wParam, LPARAM lParam)
             settings_set_check(hdlg, IDC_CHECK_DYNAREC, 0);
 #endif
 
+            settings_set_check(hdlg, IDC_CHECK_SOFTFLOAT, 0);
+
             h  = GetDlgItem(hdlg, IDC_MEMSPIN);
             h2 = GetDlgItem(hdlg, IDC_MEMTEXT);
             SendMessage(h, UDM_SETBUDDY, (WPARAM) h2, 0);
@@ -1014,6 +1022,8 @@ win_settings_machine_proc(HWND hdlg, UINT message, WPARAM wParam, LPARAM lParam)
             temp_dynarec = settings_get_check(hdlg, IDC_CHECK_DYNAREC);
 #endif
 
+            temp_fpu_softfloat = settings_get_check(hdlg, IDC_CHECK_SOFTFLOAT);
+
             if (settings_get_check(hdlg, IDC_RADIO_TS_DISABLED))
                 temp_sync = TIME_SYNC_DISABLED;