From 524721406175956c3ee7c828227f0708d4179c8e Mon Sep 17 00:00:00 2001 From: TC1995 Date: Mon, 12 Aug 2024 19:52:52 +0200 Subject: [PATCH 01/26] Video changes of the day (August 12th, 2024, S3/TGUI/SVGA)) SVGA: Added an indicator of the internal line to make matters easier for some vertical resolution problems. TGUI 9400CXi: Fixed the basic acceleration as much as I could. TGUI 9440AGi+: As with the line indicator, make matters easier for some resolution problems plus some refactoring of the bpp. Added write protection to some regs based on some documentation on the vt8601. TGUI cursor: fixed the left edge of the cursor X offset. S3: Apply the vblankstart = dispend statement only on enhanced modes, fixes vertical display on The Incredible Machine for DOS. Commented out some excess logs. --- src/include/86box/vid_svga.h | 1 + src/video/vid_s3.c | 11 +- src/video/vid_svga.c | 8 +- src/video/vid_tgui9440.c | 403 +++++++++++++++++++++-------------- 4 files changed, 255 insertions(+), 168 deletions(-) diff --git a/src/include/86box/vid_svga.h b/src/include/86box/vid_svga.h index 3d68c1a34..fef9b2122 100644 --- a/src/include/86box/vid_svga.h +++ b/src/include/86box/vid_svga.h @@ -87,6 +87,7 @@ typedef struct svga_t { int dac_b; int vtotal; int dispend; + int vdisp; int vsyncstart; int split; int vblankstart; diff --git a/src/video/vid_s3.c b/src/video/vid_s3.c index 2ad525892..7dc070fd3 100644 --- a/src/video/vid_s3.c +++ b/src/video/vid_s3.c @@ -3233,6 +3233,8 @@ s3_recalctimings(svga_t *svga) svga->hdisp = svga->hdisp_old; svga->ma_latch |= (s3->ma_ext << 16); + svga->lowres = (!!(svga->attrregs[0x10] & 0x40) && !(svga->crtc[0x3a] & 0x10)); + if (s3->chip >= S3_86C928) { if (svga->crtc[0x5d] & 0x01) svga->htotal |= 0x100; @@ -3246,8 +3248,8 @@ s3_recalctimings(svga_t *svga) svga->dispend |= 0x400; if (svga->crtc[0x5e] & 0x04) svga->vblankstart |= 0x400; - else - svga->vblankstart = svga->dispend; + else if ((svga->crtc[0x3a] & 0x10) && !svga->lowres) + svga->vblankstart = svga->dispend; /*Applies only to Enhanced modes*/ if (svga->crtc[0x5e] & 0x10) svga->vsyncstart |= 0x400; if (svga->crtc[0x5e] & 0x40) @@ -3294,8 +3296,6 @@ s3_recalctimings(svga_t *svga) break; } - svga->lowres = (!!(svga->attrregs[0x10] & 0x40) && !(svga->crtc[0x3a] & 0x10)); - if (s3->chip != S3_86C801) mask |= 0x01; switch (svga->crtc[0x50] & mask) { @@ -4140,6 +4140,9 @@ s3_recalctimings(svga_t *svga) if (svga->crtc[0x31] & 0x08) { svga->vram_display_mask = s3->vram_mask; if (svga->bpp == 8) { + if (!(svga->crtc[0x5e] & 0x04)) + svga->vblankstart = svga->dispend; /*Applies only to Enhanced modes*/ + /*Enhanced 4bpp mode, just like the 8bpp mode per the spec. */ svga->render = svga_render_8bpp_highres; svga->rowoffset <<= 1; diff --git a/src/video/vid_svga.c b/src/video/vid_svga.c index 93c1eb669..bd31abd23 100644 --- a/src/video/vid_svga.c +++ b/src/video/vid_svga.c @@ -687,7 +687,7 @@ svga_recalctimings(svga_t *svga) } else if ((svga->gdcreg[5] & 0x60) == 0x20) { if (svga->seqregs[1] & 8) { /*Low res (320)*/ svga->render = svga_render_2bpp_lowres; - pclog("2 bpp low res\n"); + svga_log("2 bpp low res\n"); } else svga->render = svga_render_2bpp_highres; } else { @@ -859,8 +859,10 @@ svga_recalctimings(svga_t *svga) svga->y_add = (svga->monitor->mon_overscan_y >> 1); svga->x_add = (svga->monitor->mon_overscan_x >> 1); - if (svga->vblankstart < svga->dispend) + if (svga->vblankstart < svga->dispend) { + svga_log("DISPEND > VBLANKSTART.\n"); svga->dispend = svga->vblankstart; + } crtcconst = svga->clock * svga->char_width; if (ibm8514_active && (svga->dev8514 != NULL)) { @@ -1222,9 +1224,11 @@ svga_poll(void *priv) if (!svga->override) { if (svga->vertical_linedbl) { wy = (svga->lastline - svga->firstline) << 1; + svga->vdisp = wy + 1; svga_doblit(wx, wy, svga); } else { wy = svga->lastline - svga->firstline; + svga->vdisp = wy + 1; svga_doblit(wx, wy, svga); } } diff --git a/src/video/vid_tgui9440.c b/src/video/vid_tgui9440.c index 25988fcc6..9ec7e6105 100644 --- a/src/video/vid_tgui9440.c +++ b/src/video/vid_tgui9440.c @@ -145,12 +145,11 @@ typedef struct tgui_t { uint32_t pattern_32[8 * 8]; } accel; - uint8_t ext_gdc_regs[16]; /*TGUI9400CXi only*/ - uint8_t copy_latch[16]; + uint8_t copy_latch[16]; /*TGUI9400CXi only*/ uint8_t tgui_3d8, tgui_3d9; int oldmode; - uint8_t oldctrl1, newctrl1; + uint8_t oldctrl1; uint8_t oldctrl2, newctrl2; uint8_t oldgr0e, newgr0e; @@ -160,6 +159,7 @@ typedef struct tgui_t { int ramdac_state; uint8_t ramdac_ctrl; + uint8_t alt_clock; int clock_m, clock_n, clock_k; @@ -212,9 +212,6 @@ static void tgui_ext_writel(uint32_t addr, uint32_t val, void *priv); static __inline uint32_t dword_remap(svga_t *svga, uint32_t in_addr) { - if (svga->packed_chain4) - return in_addr; - return ((in_addr << 2) & 0x3fff0) | ((in_addr >> 14) & 0xc) | (in_addr & ~0x3fffc); } @@ -297,7 +294,7 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) { tgui_t *tgui = (tgui_t *) priv; svga_t *svga = &tgui->svga; - uint8_t old; + uint8_t old, o; if (((addr & 0xFFF0) == 0x3D0 || (addr & 0xFFF0) == 0x3B0) && !(svga->miscout & 1)) addr ^= 0x60; @@ -331,6 +328,15 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->read_bank = svga->write_bank; return; + case 0x5a: + case 0x5b: + case 0x5c: + case 0x5d: + case 0x5e: + case 0x5f: + svga->seqregs[svga->seqaddr] = val; + return; + default: break; } @@ -344,20 +350,7 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) if (tgui->ramdac_state == 4) { tgui->ramdac_state = 0; tgui->ramdac_ctrl = val; - switch ((tgui->ramdac_ctrl >> 4) & 0x0f) { - case 1: - svga->bpp = 15; - break; - case 3: - svga->bpp = 16; - break; - case 0x0d: - svga->bpp = (tgui->type >= TGUI_9660) ? 32 : 24; - break; - default: - svga->bpp = 8; - break; - } + //pclog("TGUI ramdac ctrl=%02x.\n", (tgui->ramdac_ctrl >> 4) & 0x0f); svga_recalctimings(svga); return; } @@ -374,30 +367,35 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) break; case 0x3CF: - if (svga->gdcaddr == 0x23) { - svga->dpms = !!(val & 0x03); - svga_recalctimings(svga); - } - if (tgui->type == TGUI_9400CXI && svga->gdcaddr >= 16 && svga->gdcaddr < 32) { - old = tgui->ext_gdc_regs[svga->gdcaddr & 15]; - tgui->ext_gdc_regs[svga->gdcaddr & 15] = val; - if (svga->gdcaddr == 16) - tgui_recalcmapping(tgui); - return; - } + o = svga->gdcreg[svga->gdcaddr]; switch (svga->gdcaddr) { - case 0x6: + case 2: + svga->colourcompare = val; + break; + case 4: + svga->readplane = val & 3; + break; + case 5: + svga->writemode = val & 3; + svga->readmode = val & 8; + svga->chain2_read = val & 0x10; + break; + case 6: if (svga->gdcreg[6] != val) { svga->gdcreg[6] = val; tgui_recalcmapping(tgui); } - return; + break; + case 7: + svga->colournocare = val; + break; case 0x0e: svga->gdcreg[0xe] = val ^ 2; if ((svga->gdcreg[0xf] & 1) == 1) svga->read_bank = (svga->gdcreg[0xe]) * 65536; break; + case 0x0f: if (val & 1) svga->read_bank = (svga->gdcreg[0xe]) * 65536; @@ -414,6 +412,12 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->write_bank = (svga->seqregs[0xe]) * 65536; break; + case 0x23: + svga->dpms = !!(val & 0x03); + svga_recalctimings(svga); + break; + + case 0x2f: case 0x5a: case 0x5b: case 0x5c: @@ -426,11 +430,36 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) default: break; } - break; + svga->gdcreg[svga->gdcaddr] = val; + + if (tgui->type == TGUI_9400CXI) { + if ((svga->gdcaddr >= 0x10) && (svga->gdcaddr <= 0x1f)) { + tgui_recalcmapping(tgui); + return; + } + } + svga->fast = (svga->gdcreg[8] == 0xff && !(svga->gdcreg[3] & 0x18) && !svga->gdcreg[1]) && ((svga->chain4 && (svga->packed_chain4 || svga->force_old_addr)) || svga->fb_only); + if (((svga->gdcaddr == 5) && ((val ^ o) & 0x70)) || ((svga->gdcaddr == 6) && ((val ^ o) & 1))) + svga_recalctimings(svga); + return; case 0x3D4: svga->crtcreg = val; return; case 0x3D5: + if (!(svga->seqregs[0x0e] & 0x80) && !tgui->oldmode) { + switch (svga->crtcreg) { + case 0x21: + case 0x29: + case 0x2a: + case 0x38: + case 0x39: + case 0x3b: + case 0x3c: + return; + default: + break; + } + } if ((svga->crtcreg < 7) && (svga->crtc[0x11] & 0x80)) return; if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) @@ -484,7 +513,8 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) if ((tgui->accel.ger22 & 0xff) == 8) { if (svga->bpp != 24) { svga->hwcursor.x <<= 1; - if ((tgui->type == TGUI_9440) && (svga->crtc[0x1e] & 4)) + svga_recalctimings(svga); + if ((svga->vdisp == 1022) && svga->interlace) svga->hwcursor.x >>= 1; } } @@ -534,6 +564,10 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->read_bank = (val & 0x3f) * 65536; return; + case 0x3DB: + tgui->alt_clock = val & 0xe3; + return; + case 0x43c8: tgui->clock_n = val & 0x7f; tgui->clock_m = (tgui->clock_m & ~1) | (val >> 7); @@ -594,6 +628,8 @@ tgui_in(uint16_t addr, void *priv) return tgui->oldctrl1 | 0x88; return svga->seqregs[0x0e]; } + if ((svga->seqaddr >= 0x5a) && (svga->seqaddr <= 0x5f)) + return svga->seqregs[svga->seqaddr]; break; case 0x3C6: @@ -613,10 +649,10 @@ tgui_in(uint16_t addr, void *priv) break; case 0x3CF: - if (tgui->type == TGUI_9400CXI && svga->gdcaddr >= 16 && svga->gdcaddr < 32) - return tgui->ext_gdc_regs[svga->gdcaddr & 15]; if (svga->gdcaddr >= 0x5a && svga->gdcaddr <= 0x5f) return svga->gdcreg[svga->gdcaddr]; + if (svga->gdcaddr == 0x2f) + return svga->gdcreg[svga->gdcaddr]; break; case 0x3D4: return svga->crtcreg; @@ -639,6 +675,8 @@ tgui_in(uint16_t addr, void *priv) return tgui->tgui_3d8; case 0x3d9: return tgui->tgui_3d9; + case 0x3db: + return tgui->alt_clock; default: break; @@ -653,28 +691,49 @@ tgui_recalctimings(svga_t *svga) uint8_t ger22lower = (tgui->accel.ger22 & 0xff); uint8_t ger22upper = (tgui->accel.ger22 >> 8); - if (!svga->rowoffset) - svga->rowoffset = 0x100; - - if (svga->crtc[0x29] & 0x10) - svga->rowoffset |= 0x100; + if (tgui->type >= TGUI_9440) { + if ((svga->crtc[0x38] & 0x19) == 0x09) + svga->bpp = 32; + else { + switch ((tgui->ramdac_ctrl >> 4) & 0x0f) { + case 0x01: + svga->bpp = 15; + break; + case 0x03: + svga->bpp = 16; + break; + case 0x0d: + svga->bpp = 24; + break; + default: + svga->bpp = 8; + break; + } + } + } if ((tgui->type >= TGUI_9440) && (svga->bpp >= 24)) - svga->hdisp = (svga->crtc[1] + 1) * 8; + svga->hdisp = (svga->crtc[1] + 1) << 3; + + if (((svga->crtc[0x29] & 0x30) && (svga->bpp >= 15)) || !svga->rowoffset) + svga->rowoffset |= 0x100; + + //pclog("BPP=%d, DataWidth=%02x, CRTC29 bit 4-5=%02x, pixbusmode=%02x, rowoffset=%02x, doublerowoffset=%x.\n", svga->bpp, svga->crtc[0x2a] & 0x40, svga->crtc[0x29] & 0x30, svga->crtc[0x38], svga->rowoffset, svga->gdcreg[0x2f] & 4); if ((svga->crtc[0x1e] & 0xA0) == 0xA0) svga->ma_latch |= 0x10000; - if ((svga->crtc[0x27] & 0x01) == 0x01) + if (svga->crtc[0x27] & 0x01) svga->ma_latch |= 0x20000; - if ((svga->crtc[0x27] & 0x02) == 0x02) + if (svga->crtc[0x27] & 0x02) svga->ma_latch |= 0x40000; - if ((svga->crtc[0x27] & 0x04) == 0x04) + if (svga->crtc[0x27] & 0x04) svga->ma_latch |= 0x80000; if (svga->crtc[0x27] & 0x08) svga->split |= 0x400; if (svga->crtc[0x27] & 0x10) svga->dispend |= 0x400; + if (svga->crtc[0x27] & 0x20) svga->vsyncstart |= 0x400; if (svga->crtc[0x27] & 0x40) @@ -687,15 +746,18 @@ tgui_recalctimings(svga_t *svga) svga->lowres = 0; } + svga->interlace = !!(svga->crtc[0x1e] & 4); + if (svga->interlace && (tgui->type < TGUI_9440)) + svga->rowoffset >>= 1; + + if (svga->vdisp == 1020) + svga->vdisp += 2; + if ((tgui->oldctrl2 & 0x10) || (svga->crtc[0x2a] & 0x40)) svga->ma_latch <<= 1; svga->lowres = !(svga->crtc[0x2a] & 0x40); - svga->interlace = !!(svga->crtc[0x1e] & 4); - if (svga->interlace && (tgui->type < TGUI_9440)) - svga->rowoffset >>= 1; - if (tgui->type >= TGUI_9440) { if (svga->miscout & 8) svga->clock = (cpuclock * (double) (1ULL << 32)) / (((tgui->clock_n + 8) * 14318180.0) / ((tgui->clock_m + 2) * (1 << tgui->clock_k))); @@ -752,6 +814,7 @@ tgui_recalctimings(svga_t *svga) default: break; } + if (svga->gdcreg[0xf] & 0x08) { svga->htotal <<= 1; svga->hdisp <<= 1; @@ -763,7 +826,24 @@ tgui_recalctimings(svga_t *svga) switch (svga->bpp) { case 8: svga->render = svga_render_8bpp_highres; + if (svga->vdisp == 1022) { + if (svga->interlace) + svga->dispend++; + else + svga->dispend += 2; + } if (tgui->type >= TGUI_9660) { + switch (svga->vdisp) { + case 1024: + case 1200: + svga->htotal <<= 1; + svga->hdisp <<= 1; + svga->hdisp_time <<= 1; + break; + default: + break; + } +#if OLD_CODE if (svga->dispend == ((1024 >> 1) - 2)) svga->dispend += 2; if (svga->dispend == (1024 >> 1)) @@ -776,6 +856,7 @@ tgui_recalctimings(svga_t *svga) else if (!svga->interlace && (svga->dispend == 768)) svga->hdisp <<= 1; } +#endif if (ger22upper & 0x80) { svga->htotal <<= 1; @@ -785,15 +866,12 @@ tgui_recalctimings(svga_t *svga) switch (svga->hdisp) { case 640: if (!ger22lower) - svga->rowoffset = 80; + svga->rowoffset = 0x50; break; default: break; } - } else { - if ((svga->hdisp == 1280) && (svga->dispend == (1020 >> 1)) && svga->interlace) - svga->dispend++; } break; case 15: @@ -812,11 +890,10 @@ tgui_recalctimings(svga_t *svga) svga->hdisp = (svga->hdisp << 1) / 3; break; case 32: + if (svga->rowoffset == 0x100) + svga->rowoffset <<= 1; + svga->render = svga_render_32bpp_highres; - if (tgui->type >= TGUI_9660) { - if (!ger22upper) - svga->rowoffset <<= 1; - } break; default: @@ -831,14 +908,14 @@ tgui_recalcmapping(tgui_t *tgui) svga_t *svga = &tgui->svga; if (tgui->type == TGUI_9400CXI) { - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { + if (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) { mem_mapping_set_handler(&tgui->linear_mapping, tgui_ext_linear_read, NULL, NULL, tgui_ext_linear_write, tgui_ext_linear_writew, tgui_ext_linear_writel); mem_mapping_set_handler(&svga->mapping, tgui_ext_read, NULL, NULL, tgui_ext_write, tgui_ext_writew, tgui_ext_writel); - } else if (tgui->ext_gdc_regs[0] & EXT_CTRL_MONO_EXPANSION) { + } else if (svga->gdcreg[0x10] & EXT_CTRL_MONO_EXPANSION) { mem_mapping_set_handler(&tgui->linear_mapping, svga_read_linear, svga_readw_linear, svga_readl_linear, tgui_ext_linear_write, tgui_ext_linear_writew, tgui_ext_linear_writel); @@ -934,7 +1011,7 @@ tgui_recalcmapping(tgui_t *tgui) } if (tgui->type >= TGUI_9440) { - if ((tgui->mmio_base != 0x00000000) && (svga->crtc[0x39] & 1)) + if ((tgui->mmio_base != 0x00000000) && (svga->crtc[0x39] & 0x01)) mem_mapping_set_addr(&tgui->mmio_mapping, tgui->mmio_base, 0x10000); else mem_mapping_disable(&tgui->mmio_mapping); @@ -945,7 +1022,7 @@ static void tgui_hwcursor_draw(svga_t *svga, int displine) { uint32_t dat[2]; - int offset = svga->hwcursor_latch.x + svga->hwcursor_latch.xoff; + int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; int pitch = (svga->hwcursor_latch.cur_xsize == 64) ? 16 : 8; if (svga->interlace && svga->hwcursor_oddeven) @@ -1127,26 +1204,24 @@ tgui_ext_linear_read(uint32_t addr, void *priv) svga_t *svga = (svga_t *) priv; tgui_t *tgui = (tgui_t *) svga->priv; - cycles -= video_timing_read_b; + cycles -= svga->monitor->mon_video_timing_read_b; addr &= svga->decode_mask; if (addr >= svga->vram_max) return 0xff; addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; - addr = dword_remap(svga, addr); + addr &= ~0x0f; + addr = dword_remap(svga, addr); - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { - for (int c = 0; c < 16; c++) { - tgui->copy_latch[c] = svga->vram[addr]; - addr += (c & 3) ? 1 : 13; - addr &= svga->vram_mask; - } - return svga->vram[addr]; + for (int i = 0; i < 16; i++) { + tgui->copy_latch[i] = svga->vram[addr]; + addr += ((i & 3) == 3) ? 0x0d : 0x01; } - return svga_read_linear(addr, svga); + addr &= svga->vram_mask; + + return svga->vram[addr]; } static uint8_t @@ -1164,65 +1239,77 @@ tgui_ext_linear_write(uint32_t addr, uint8_t val, void *priv) { svga_t *svga = (svga_t *) priv; const tgui_t *tgui = (tgui_t *) svga->priv; - int c; - int bpp = (tgui->ext_gdc_regs[0] & EXT_CTRL_16BIT); - uint8_t fg[2] = { tgui->ext_gdc_regs[4], tgui->ext_gdc_regs[5] }; - uint8_t bg[2] = { tgui->ext_gdc_regs[1], tgui->ext_gdc_regs[2] }; - uint8_t mask = tgui->ext_gdc_regs[7]; + int bpp = (svga->gdcreg[0x10] & EXT_CTRL_16BIT); + uint8_t fg[2] = { svga->gdcreg[0x14], svga->gdcreg[0x15] }; + uint8_t bg[2] = { svga->gdcreg[0x11], svga->gdcreg[0x12] }; - cycles -= video_timing_write_b; + cycles -= svga->monitor->mon_video_timing_write_b; addr &= svga->decode_mask; if (addr >= svga->vram_max) return; - addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; - addr = dword_remap(svga, addr); + addr &= svga->vram_mask; + addr &= (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; + addr = dword_remap(svga, addr); + svga->changedvram[addr >> 12] = svga->monitor->mon_changeframecount; - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { - for (c = 0; c < 16; c++) { - svga->vram[addr] = tgui->copy_latch[c]; - addr += ((c & 3) == 3) ? 13 : 1; + if (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) { + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) + svga->vram[addr] = tgui->copy_latch[i]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } - } else if (tgui->ext_gdc_regs[0] & (EXT_CTRL_MONO_EXPANSION | EXT_CTRL_MONO_TRANSPARENT)) { - if (tgui->ext_gdc_regs[0] & EXT_CTRL_MONO_TRANSPARENT) { + } else { + if (svga->gdcreg[0x10] & EXT_CTRL_MONO_TRANSPARENT) { if (bpp) { - for (c = 7; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = fg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) + svga->vram[addr] = fg[i & 1]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 7; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = tgui->ext_gdc_regs[4]; - addr += (c == 4) ? 13 : 1; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) + svga->vram[addr] = fg[0]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } else { if (bpp) { - for (c = 7; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? fg[(c & 1) ^ 1] : bg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = fg[i & 1]; + } else { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = bg[i & 1]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 7; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? tgui->ext_gdc_regs[4] : tgui->ext_gdc_regs[1]; - addr += (c == 4) ? 13 : 1; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = fg[0]; + } else { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = bg[0]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } - } else - svga_write_linear(addr, val, svga); + } } static void @@ -1230,94 +1317,85 @@ tgui_ext_linear_writew(uint32_t addr, uint16_t val, void *priv) { svga_t *svga = (svga_t *) priv; const tgui_t *tgui = (tgui_t *) svga->priv; - int c; - int bpp = (tgui->ext_gdc_regs[0] & EXT_CTRL_16BIT); - uint8_t fg[2] = { tgui->ext_gdc_regs[4], tgui->ext_gdc_regs[5] }; - uint8_t bg[2] = { tgui->ext_gdc_regs[1], tgui->ext_gdc_regs[2] }; - uint16_t mask = (tgui->ext_gdc_regs[7] << 8) | tgui->ext_gdc_regs[8]; + int bpp = (svga->gdcreg[0x10] & EXT_CTRL_16BIT); + uint8_t fg[2] = { svga->gdcreg[0x14], svga->gdcreg[0x15] }; + uint8_t bg[2] = { svga->gdcreg[0x11], svga->gdcreg[0x12] }; + uint16_t mask = svga->gdcreg[0x18] | (svga->gdcreg[0x17] << 8); - cycles -= video_timing_write_w; + cycles -= svga->monitor->mon_video_timing_write_w; addr &= svga->decode_mask; if (addr >= svga->vram_max) return; + addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; + addr &= ~0x0f; + addr = dword_remap(svga, addr); - addr = dword_remap(svga, addr); svga->changedvram[addr >> 12] = svga->monitor->mon_changeframecount; - val = (val >> 8) | (val << 8); - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { - for (c = 0; c < 16; c++) { - svga->vram[addr] = tgui->copy_latch[c]; - addr += (c & 3) ? 1 : 13; + if (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) { + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) + svga->vram[addr] = tgui->copy_latch[i]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } - } else if (tgui->ext_gdc_regs[0] & (EXT_CTRL_MONO_EXPANSION | EXT_CTRL_MONO_TRANSPARENT)) { - if (tgui->ext_gdc_regs[0] & EXT_CTRL_MONO_TRANSPARENT) { + } else { + if (svga->gdcreg[0x10] & EXT_CTRL_MONO_TRANSPARENT) { if (bpp) { - for (c = 15; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = fg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) + svga->vram[addr] = fg[i & 1]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 15; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = tgui->ext_gdc_regs[4]; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) + svga->vram[addr] = fg[0]; - addr += (c & 3) ? 1 : 13; + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } else { if (bpp) { - for (c = 15; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? fg[(c & 1) ^ 1] : bg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) { + if (mask & (0x8000 >> i)) + svga->vram[addr] = fg[i & 1]; + } else { + if (mask & (0x8000 >> i)) + svga->vram[addr] = bg[i & 1]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 15; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? tgui->ext_gdc_regs[4] : tgui->ext_gdc_regs[1]; - - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) { + if (mask & (0x8000 >> i)) + svga->vram[addr] = fg[0]; + } else { + if (mask & (0x8000 >> i)) + svga->vram[addr] = bg[0]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } - } else - svga_writew_linear(addr, val, svga); + } } static void tgui_ext_linear_writel(uint32_t addr, uint32_t val, void *priv) { - svga_t *svga = (svga_t *) priv; - const tgui_t *tgui = (tgui_t *) svga->priv; - - cycles -= video_timing_write_l; - - addr &= svga->decode_mask; - if (addr >= svga->vram_max) - return; - addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; - - addr = dword_remap(svga, addr); - svga->changedvram[addr >> 12] = svga->monitor->mon_changeframecount; - - if (tgui->ext_gdc_regs[0] & (EXT_CTRL_MONO_EXPANSION | EXT_CTRL_MONO_TRANSPARENT | EXT_CTRL_LATCH_COPY)) { - tgui_ext_linear_writew(addr, val & 0xffff, priv); - tgui_ext_linear_writew(addr + 2, val >> 16, priv); - } else { - svga_writel_linear(addr, val, svga); - } + tgui_ext_linear_writew(addr, val, priv); } static void @@ -1490,6 +1568,8 @@ tgui_accel_command(int count, uint32_t cpu_dat, tgui_t *tgui) case 32: tgui->accel.pitch <<= 1; break; + default: + break; } #if 0 pclog("TGUI accel command = %x, ger22 = %04x, hdisp = %d, dispend = %d, vtotal = %d, rowoffset = %d, svgabpp = %d, interlace = %d, accelbpp = %d, pitch = %d.\n", tgui->accel.command, tgui->accel.ger22, svga->hdisp, svga->dispend, svga->vtotal, svga->rowoffset, svga->bpp, svga->interlace, tgui->accel.bpp, tgui->accel.pitch); @@ -3219,7 +3299,6 @@ tgui_init(const device_t *info) if (tgui->type >= TGUI_9440) { svga->packed_chain4 = 1; - tgui->i2c = i2c_gpio_init("ddc_tgui"); tgui->ddc = ddc_init(i2c_gpio_get_bus(tgui->i2c)); } From 6cc761518526a26619f48aabc41f966f02526b89 Mon Sep 17 00:00:00 2001 From: OBattler Date: Tue, 13 Aug 2024 00:26:53 +0200 Subject: [PATCH 02/26] Keyboard AT: Send ACK before the resent scan code on command FE, fixes #4713. --- src/device/keyboard_at.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/device/keyboard_at.c b/src/device/keyboard_at.c index bcb4d646b..882174e4e 100644 --- a/src/device/keyboard_at.c +++ b/src/device/keyboard_at.c @@ -2102,6 +2102,7 @@ keyboard_at_write(void *priv) /* TODO: This is supposed to resend multiple bytes after some commands. */ case 0xfe: /* resend last scan code */ keyboard_at_log("%s: resend last scan code\n", dev->name); + kbc_at_dev_queue_add(dev, 0xfa, 0); kbc_at_dev_queue_add(dev, dev->last_scan_code, 0); break; From 059c6156df70df3dc431f5a2760d7bc37fd7edd6 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Sat, 10 Aug 2024 19:57:46 -0400 Subject: [PATCH 03/26] Break out the K5 timings --- src/codegen/codegen.h | 1 + src/codegen_new/codegen.h | 1 + src/cpu/CMakeLists.txt | 6 + src/cpu/codegen_timing_k5.c | 2232 +++++++++++++++++++++++++++++++++++ 4 files changed, 2240 insertions(+) create mode 100644 src/cpu/codegen_timing_k5.c diff --git a/src/codegen/codegen.h b/src/codegen/codegen.h index 6d30211a6..d020fc57f 100644 --- a/src/codegen/codegen.h +++ b/src/codegen/codegen.h @@ -311,6 +311,7 @@ extern codegen_timing_t codegen_timing_686; extern codegen_timing_t codegen_timing_486; extern codegen_timing_t codegen_timing_winchip; extern codegen_timing_t codegen_timing_winchip2; +extern codegen_timing_t codegen_timing_k5; extern codegen_timing_t codegen_timing_k6; extern codegen_timing_t codegen_timing_p6; diff --git a/src/codegen_new/codegen.h b/src/codegen_new/codegen.h index deeeb899c..eecfa249b 100644 --- a/src/codegen_new/codegen.h +++ b/src/codegen_new/codegen.h @@ -341,6 +341,7 @@ extern codegen_timing_t codegen_timing_686; extern codegen_timing_t codegen_timing_486; extern codegen_timing_t codegen_timing_winchip; extern codegen_timing_t codegen_timing_winchip2; +extern codegen_timing_t codegen_timing_k5; extern codegen_timing_t codegen_timing_k6; extern codegen_timing_t codegen_timing_p6; diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt index a3677767d..b12356ccc 100644 --- a/src/cpu/CMakeLists.txt +++ b/src/cpu/CMakeLists.txt @@ -19,6 +19,12 @@ add_library(cpu OBJECT cpu.c cpu_table.c fpu.c x86.c 808x.c 386.c 386_common.c if(AMD_K5) target_compile_definitions(cpu PRIVATE USE_AMD_K5) + +if(DYNAREC) + add_library(ctk5 OBJECT codegen_timing_k5.c) + target_link_libraries(86Box ctk5) +endif() + endif() if(CYRIX_6X86) diff --git a/src/cpu/codegen_timing_k5.c b/src/cpu/codegen_timing_k5.c new file mode 100644 index 000000000..bdcc0ce43 --- /dev/null +++ b/src/cpu/codegen_timing_k5.c @@ -0,0 +1,2232 @@ +/*Most of the vector instructions here are a total guess. + Some of the timings are based on http://http://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_k5_InstLatX86.txt*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include <86box/machine.h> + +#include "x86.h" +#include "x86_ops.h" +#include "x86seg_common.h" +#include "x87_sf.h" +#include "x87.h" +#include "386_common.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +typedef enum uop_type_t { + UOP_ALU = 0, /*Executes in Integer X or Y units*/ + UOP_ALUX, /*Executes in Integer X unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORE, /*Executes in Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORE, /*Executes in Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORE, /*Executes in Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MEU, /*Executes in Multimedia unit*/ + UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ + UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ + UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_LIMM /*Does not require an execution unit*/ +} uop_type_t; + +typedef enum decode_type_t { + DECODE_SHORT, + DECODE_LONG, + DECODE_VECTOR +} decode_type_t; + +#define MAX_UOPS 10 + +typedef struct risc86_uop_t { + uop_type_t type; + int throughput; + int latency; +} risc86_uop_t; + +typedef struct risc86_instruction_t { + int nr_uops; + decode_type_t decode_type; + risc86_uop_t uop[MAX_UOPS]; +} risc86_instruction_t; + +static const risc86_instruction_t alu_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alux_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t branch_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t limm_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t load_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t store_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t bswap_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t leave_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t lods_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t loop_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mov_reg_seg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, +}; +static const risc86_instruction_t movs_op = { + .nr_uops = 4, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pop_reg_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pop_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t push_imm_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, +}; +static const risc86_instruction_t push_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t push_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t stos_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_reg_b_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_mem_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_mem_imm_b_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t xchg_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t m3dn_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mmx_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mmx_mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t mmx_shift_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_3dn_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_mmx_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_mmx_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t load_mmx_shift_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mload_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t mstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t pmul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t float_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t load_float_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t fstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t fdiv_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +}; +static const risc86_instruction_t fdiv_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_FLOAT, .throughput = 40, .latency = 40} +}; +static const risc86_instruction_t fsin_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} +}; +static const risc86_instruction_t fsqrt_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} +}; + +static const risc86_instruction_t vector_fldcw_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} +}; +static const risc86_instruction_t vector_float_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t vector_float_l_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} +}; +static const risc86_instruction_t vector_flde_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t vector_fste_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, + .uop[1] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t vector_alu1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu2_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu6_op = { + .nr_uops = 6, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux6_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_arpl_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_bound_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_bsx_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_call_far_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cli_sti_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} +}; +static const risc86_instruction_t vector_cmps_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cmpsb_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cmpxchg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, +}; +static const risc86_instruction_t vector_cmpxchg_b_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, +}; +static const risc86_instruction_t vector_cpuid_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} +}; +static const risc86_instruction_t vector_div16_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_div16_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_div32_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +}; +static const risc86_instruction_t vector_div32_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 18, .latency = 18} +}; +static const risc86_instruction_t vector_emms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} +}; +static const risc86_instruction_t vector_enter_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALU, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_femms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} +}; +static const risc86_instruction_t vector_in_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} +}; +static const risc86_instruction_t vector_ins_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_int_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_iret_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[3] = { .type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_invd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} +}; +static const risc86_instruction_t vector_jmp_far_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_loop_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_lss_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mov_mem_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mov_seg_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mov_seg_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul64_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul64_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_out_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_outs_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1 }, + .uop[1] = { .type = UOP_STORE, .throughput = 10, .latency = 10}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_pusha_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_popa_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_LOAD, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_popf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 17, .latency = 17} +}; +static const risc86_instruction_t vector_push_mem_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_pushf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_ret_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_retf_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_scas_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_scasb_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_setcc_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_setcc_reg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_test_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_test_mem_b_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_xchg_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_xlat_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t vector_wbinvd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} +}; + +#define INVALID NULL + +static const risc86_instruction_t *opcode_timings_k5[256] = { + // clang-format off +/* ADD ADD ADD ADD*/ +/*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, +/* OR OR OR OR*/ + &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* OR OR PUSH CS */ + &alux_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/* ADC ADC PUSH SS POP SS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, +/* SBB SBB SBB SBB*/ +/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/* SBB SBB PUSH DS POP DS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* AND AND DAA*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, +/* SUB SUB SUB SUB*/ + &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* SUB SUB DAS*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* XOR XOR AAA*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, +/* CMP CMP CMP CMP*/ + &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, +/* CMP CMP AAS*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, +/* INSB INSW OUTSB OUTSW*/ + &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, +/* PUSHF POPF SAHF LAHF*/ + &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + +/* MOV*/ +/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/* LES LDS MOV MOV*/ + &vector_lss_op, &vector_lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, +/* INT3 INT INTO IRET*/ + &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &vector_jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, +/* CLD STD INCDEC*/ + &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_mod3[256] = { + // clang-format off +/* ADD ADD ADD ADD*/ +/*00*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, +/* OR OR OR OR*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* OR OR PUSH CS */ + &alux_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/* ADC ADC PUSH SS POP SS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, +/* SBB SBB SBB SBB*/ + &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/* SBB SBB PUSH DS POP DS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* AND AND DAA*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, +/* SUB SUB SUB SUB*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* SUB SUB DAS*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* XOR XOR AAA*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, +/* CMP CMP CMP CMP*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* CMP CMP AAS*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, +/* INSB INSW OUTSB OUTSW*/ + &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, +/* PUSHF POPF SAHF LAHF*/ + &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + +/* MOV*/ +/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/* LES LDS MOV MOV*/ + &vector_lss_op, &vector_lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, +/* INT3 INT INTO IRET*/ + &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &vector_jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, +/* CLD STD INCDEC*/ + &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_0f[256] = { + // clang-format off +/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, + &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &vector_femms_op, &load_3dn_op, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + INVALID, INVALID, &mload_op, &mload_op, + +/*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mstore_op, &mstore_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + +/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, + &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, + &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, + &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, + +/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, + &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, + &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, + +/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &vector_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_0f_mod3[256] = { + // clang-format off +/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, + &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &vector_femms_op, &m3dn_op, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + +/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, + &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, + &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, + &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, + +/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, + &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, + &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, + +/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_0f0f[256] = { + // clang-format off +/*00*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &load_3dn_op, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &load_3dn_op, INVALID, INVALID, + +/*20*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*70*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*80*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*90*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_3dn_op, INVALID, &load_3dn_op, &load_3dn_op, + INVALID, INVALID, &load_3dn_op, INVALID, + INVALID, INVALID, &load_3dn_op, INVALID, + +/*a0*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_3dn_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, + INVALID, INVALID, &load_3dn_op, INVALID, + INVALID, INVALID, &load_3dn_op, INVALID, + +/*b0*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, &load_mmx_op, + +/*c0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*d0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*e0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*f0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_0f0f_mod3[256] = { + // clang-format off +/*00*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &m3dn_op, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &m3dn_op, INVALID, INVALID, + +/*20*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*70*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*80*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*90*/ &m3dn_op, INVALID, INVALID, INVALID, + &m3dn_op, INVALID, &m3dn_op, &m3dn_op, + INVALID, INVALID, &m3dn_op, INVALID, + INVALID, INVALID, &m3dn_op, INVALID, + +/*a0*/ &m3dn_op, INVALID, INVALID, INVALID, + &m3dn_op, INVALID, &mmx_mul_op, &mmx_mul_op, + INVALID, INVALID, &m3dn_op, INVALID, + INVALID, INVALID, &m3dn_op, INVALID, + +/*b0*/ &m3dn_op, INVALID, INVALID, INVALID, + &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, &mmx_op, + +/*c0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*d0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*e0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*f0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_shift[8] = { + // clang-format off + &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, + &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_shift_b[8] = { + // clang-format off + &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, + &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_shift_mod3[8] = { + // clang-format off + &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_shift_b_mod3[8] = { + // clang-format off + &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_80[8] = { + // clang-format off + &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_80_mod3[8] = { + // clang-format off + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_8x[8] = { + // clang-format off + &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_8x_mod3[8] = { + // clang-format off + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_f6[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_f6_mod3[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_reg_b_op, INVALID, &alux_op, &alux_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_f7[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_f7_mod3[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_reg_op, INVALID, &alu_op, &alu_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_ff[8] = { + // clang-format off +/* INC DEC CALL CALL far*/ + &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_ff_mod3[8] = { + // clang-format off +/* INC DEC CALL CALL far*/ + &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_d8[8] = { + // clang-format off +/* FADDs FMULs FCOMs FCOMPs*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FSUBs FSUBRs FDIVs FDIVRs*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_d8_mod3[8] = { + // clang-format off +/* FADD FMUL FCOM FCOMP*/ + &float_op, &float_op, &float_op, &float_op, +/* FSUB FSUBR FDIV FDIVR*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_d9[8] = { + // clang-format off +/* FLDs FSTs FSTPs*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDENV FLDCW FSTENV FSTCW*/ + &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_d9_mod3[64] = { + // clang-format off + /*FLD*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FXCH*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FNOP*/ + &float_op, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, +/* opFCHS opFABS*/ + &float_op, &float_op, INVALID, INVALID, +/* opFTST opFXAM*/ + &float_op, &float_op, INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + &float_op, &float_op, &float_op, &float_op, +/* opFLDEG2 opFLDLN2 opFLDZ*/ + &float_op, &float_op, &float_op, INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + &fsin_op, &fsin_op, &fsin_op, &fsin_op, +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, &float_op, &float_op, +/* opFPREM opFSQRT opFSINCOS*/ + &fdiv_op, INVALID, &fsqrt_op, &fsin_op, +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + &float_op, &fdiv_op, &fsin_op, &fsin_op + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_da[8] = { + // clang-format off +/* FIADDl FIMULl FICOMl FICOMPl*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_da_mod3[8] = { + // clang-format off + INVALID, INVALID, INVALID, INVALID, +/* FCOMPP*/ + INVALID, &float_op, INVALID, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_db[8] = { + // clang-format off +/* FLDil FSTil FSTPil*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDe FSTPe*/ + INVALID, &vector_flde_op, INVALID, &vector_fste_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_db_mod3[64] = { + // clang-format off + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* opFNOP opFCLEX opFINIT*/ + INVALID, &float_op, &float_op, &float_op, +/* opFNOP opFNOP*/ + &float_op, &float_op, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_dc[8] = { + // clang-format off +/* FADDd FMULd FCOMd FCOMPd*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FSUBd FSUBRd FDIVd FDIVRd*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_dc_mod3[8] = { + // clang-format off +/* opFADDr opFMULr*/ + &float_op, &float_op, INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + &float_op, &float_op, &fdiv_op, &fdiv_op + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_dd[8] = { + // clang-format off +/* FLDd FSTd FSTPd*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FRSTOR FSAVE FSTSW*/ + &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_dd_mod3[8] = { + // clang-format off +/* FFFREE FST FSTP*/ + &float_op, INVALID, &float_op, &float_op, +/* FUCOM FUCOMP*/ + &float_op, &float_op, INVALID, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_de[8] = { + // clang-format off +/* FIADDw FIMULw FICOMw FICOMPw*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_de_mod3[8] = { + // clang-format off +/* FADDP FMULP FCOMPP*/ + &float_op, &float_op, INVALID, &float_op, +/* FSUBP FSUBRP FDIVP FDIVRP*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_df[8] = { + // clang-format off +/* FILDiw FISTiw FISTPiw*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FILDiq FBSTP FISTPiq*/ + INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_df_mod3[8] = { + // clang-format off + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + &float_op, INVALID, INVALID, INVALID + // clang-format on +}; + +static uint8_t last_prefix; +static int prefixes; + +static int decode_timestamp; +static int last_complete_timestamp; + +typedef struct k5_unit_t { + uint32_t uop_mask; + int first_available_cycle; +} k5_unit_t; + +static int nr_units; +static k5_unit_t *units; + +/*k5 has dedicated MMX unit*/ +static k5_unit_t k5_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) }, /*Integer X*/ + { .uop_mask = (1 << UOP_ALU) }, /*Integer Y*/ + { .uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) }, /*Multimedia*/ + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ +}; +#define NR_k5_UNITS (sizeof(k5_units) / sizeof(k5_unit_t)) + +/*k5-2 and later integrate MMX into ALU X & Y, sharing multiplier, shifter and + 3DNow ALU between two execution units*/ +static k5_unit_t k5_2_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ +}; +#define NR_k5_2_UNITS (sizeof(k5_2_units) / sizeof(k5_unit_t)) + +/*First available cycles of shared execution units. Each of these can be submitted + to by ALU X and Y*/ +static int mul_first_available_cycle; +static int shift_first_available_cycle; +static int m3dnow_first_available_cycle; + +static int +uop_run(const risc86_uop_t *uop, int decode_time) +{ + k5_unit_t *best_unit = NULL; + int best_start_cycle = 99999; + + /*UOP_LIMM does not require execution*/ + if (uop->type == UOP_LIMM) + return decode_time; + + /*Handle shared units on k5-2 and later*/ + if (units == k5_2_units) { + if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) + decode_time = mul_first_available_cycle; + else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) + decode_time = shift_first_available_cycle; + else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) + decode_time = m3dnow_first_available_cycle; + } + + /*Find execution unit for this uOP*/ + for (int c = 0; c < nr_units; c++) { + if (units[c].uop_mask & (1 << uop->type)) { + if (units[c].first_available_cycle < best_start_cycle) { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } + } + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); + + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->throughput; + + if (units == k5_2_units) { + if (uop->type == UOP_MEU_MUL) + mul_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_SHIFT) + shift_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_3DN) + m3dnow_first_available_cycle = best_start_cycle + uop->throughput; + } + + return best_start_cycle + uop->throughput; +} + +/*The k5 decoder can decode, per clock : + - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long + - 1 'long' instruction, up to 4 uOPs + - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +*/ +static struct { + int nr_uops; + const risc86_uop_t *uops[4]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[4]; +} decode_buffer; + +#define NR_OPQUADS 6 +/*Timestamps of when the last six opquads completed. The k5 scheduler retires + opquads in order, so this is needed to determine when the next can be scheduled*/ +static int opquad_completion_timestamp[NR_OPQUADS]; +static int next_opquad = 0; + +#define NR_REGS 8 +/*Timestamp of when last operation on an integer register completed*/ +static int reg_available_timestamp[NR_REGS]; +/*Timestamp of when last operation on an FPU register completed*/ +static int fpu_st_timestamp[8]; +/*Completion time of the last uop to be processed. Used to calculate timing of + dependent uop chains*/ +static int last_uop_timestamp = 0; + +void +decode_flush_k5(void) +{ + int uop_timestamp = 0; + + /*Decoded opquad can not be submitted if there are no free spaces in the + opquad buffer*/ + if (decode_timestamp < opquad_completion_timestamp[next_opquad]) + decode_timestamp = opquad_completion_timestamp[next_opquad]; + + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; + + /*Submit uops to execution units, and determine the latest completion time*/ + for (int c = 0; c < decode_buffer.nr_uops; c++) { + int start_timestamp; + + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; + else + start_timestamp = decode_buffer.earliest_start[c]; + + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } + + /*Calculate opquad completion time. Since opquads complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opquad in buffer*/ + opquad_completion_timestamp[next_opquad] = last_complete_timestamp; + next_opquad++; + if (next_opquad == NR_OPQUADS) + next_opquad = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; +} + +/*The instruction is only of interest here if it's longer than 7 bytes, as that's the + limit on k5 short decoding*/ +static int +codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +{ + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; + + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; + } + } + + return len; +} + +static void +decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +{ + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d; + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) { + if (regmask_required & (1 << c)) { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; + } + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if (deps & FPU_RW_STREG) { + int reg = fetchdat & 7; + + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } + + /*Short decoders are limited to 7 bytes*/ + if (decode_type == DECODE_SHORT && instr_length > 7) + decode_type = DECODE_LONG; + /*Long decoder is limited to 11 bytes*/ + else if (instr_length > 11) + decode_type = DECODE_VECTOR; + + switch (decode_type) { + case DECODE_SHORT: + if (decode_buffer.nr_uops) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[decode_buffer.nr_uops + 1] = &ins->uop[1]; + decode_buffer.earliest_start[decode_buffer.nr_uops + 1] = -1; + } + decode_buffer.nr_uops += ins->nr_uops; + + decode_flush_k5(); + } else { + decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[1] = &ins->uop[1]; + decode_buffer.earliest_start[1] = -1; + } + } + break; + + case DECODE_LONG: + if (decode_buffer.nr_uops) + decode_flush_k5(); + + decode_buffer.nr_uops = ins->nr_uops; + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[c] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[c] = earliest_start; + else + decode_buffer.earliest_start[c] = -1; + } + decode_flush_k5(); + break; + + case DECODE_VECTOR: + if (decode_buffer.nr_uops) + decode_flush_k5(); + + decode_timestamp++; + d = 0; + + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if (d == 4) { + d = 0; + decode_buffer.nr_uops = 4; + decode_flush_k5(); + } + } + if (d) { + decode_buffer.nr_uops = d; + decode_flush_k5(); + } + break; + } + + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c + 1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) { + int reg = fetchdat & 7; + if (deps & FPU_POP) + reg--; + if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } +} + +void +codegen_timing_k5_block_start(void) +{ + int c; + + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; + + mul_first_available_cycle = 0; + shift_first_available_cycle = 0; + m3dnow_first_available_cycle = 0; + + decode_timestamp = 0; + last_complete_timestamp = 0; + + for (c = 0; c < NR_OPQUADS; c++) + opquad_completion_timestamp[c] = 0; + next_opquad = 0; + + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; +} + +void +codegen_timing_k5_start(void) +{ + if (cpu_s->cpu_type == CPU_K5) { + units = k5_units; + nr_units = NR_k5_UNITS; + } else { + units = k5_2_units; + nr_units = NR_k5_2_UNITS; + } + last_prefix = 0; + prefixes = 0; +} + +void +codegen_timing_k5_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix != 0x0f) + decode_timestamp++; + + last_prefix = prefix; + prefixes++; +} + +void +codegen_timing_k5_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + const risc86_instruction_t **ins_table; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); + + switch (last_prefix) { + case 0x0f: + if (opcode == 0x0f) { + /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ + uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ + uint8_t modrm = fetchdat & 0xff; + uint8_t sib = (fetchdat >> 8) & 0xff; + + if ((modrm & 0xc0) != 0xc0) { + if (op_32 & 0x200) { + if ((modrm & 7) == 4) { + /* Has SIB*/ + opcode_pc++; + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((sib & 0x07) == 0x05) + opcode_pc += 4; + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((modrm & 0xc7) == 0x05) + opcode_pc += 4; + } + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 2; + else if ((modrm & 0xc7) == 0x06) + opcode_pc += 2; + } + } + + opcode = fastreadb(cs + opcode_pc); + + ins_table = mod3 ? opcode_timings_k5_0f0f_mod3 : opcode_timings_k5_0f0f; + deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; + } else { + ins_table = mod3 ? opcode_timings_k5_0f_mod3 : opcode_timings_k5_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + } + break; + + case 0xd8: + ins_table = mod3 ? opcode_timings_k5_d8_mod3 : opcode_timings_k5_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_k5_d9_mod3 : opcode_timings_k5_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_k5_da_mod3 : opcode_timings_k5_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_k5_db_mod3 : opcode_timings_k5_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_k5_dc_mod3 : opcode_timings_k5_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_k5_dd_mod3 : opcode_timings_k5_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_k5_de_mod3 : opcode_timings_k5_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_k5_df_mod3 : opcode_timings_k5_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + ins_table = mod3 ? opcode_timings_k5_80_mod3 : opcode_timings_k5_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + case 0x83: + ins_table = mod3 ? opcode_timings_k5_8x_mod3 : opcode_timings_k5_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xd0: + case 0xd2: + ins_table = mod3 ? opcode_timings_k5_shift_b_mod3 : opcode_timings_k5_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: + case 0xd1: + case 0xd3: + ins_table = mod3 ? opcode_timings_k5_shift_mod3 : opcode_timings_k5_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_k5_f6_mod3 : opcode_timings_k5_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_k5_f7_mod3 : opcode_timings_k5_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_k5_ff_mod3 : opcode_timings_k5_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + ins_table = mod3 ? opcode_timings_k5_mod3 : opcode_timings_k5; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} + +void +codegen_timing_k5_block_end(void) +{ + if (decode_buffer.nr_uops) { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush_k5(); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } +} + +int +codegen_timing_k5_jump_cycles(void) +{ + if (decode_buffer.nr_uops) + return 1; + return 0; +} + +codegen_timing_t codegen_timing_k5 = { + codegen_timing_k5_start, + codegen_timing_k5_prefix, + codegen_timing_k5_opcode, + codegen_timing_k5_block_start, + codegen_timing_k5_block_end, + codegen_timing_k5_jump_cycles +}; From 2f1b64d43b2752ccb2dd829bc89a9b8bef22de1e Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Sat, 10 Aug 2024 20:00:48 -0400 Subject: [PATCH 04/26] Remove excess http:// from links --- src/cpu/codegen_timing_k5.c | 2 +- src/cpu/codegen_timing_k6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu/codegen_timing_k5.c b/src/cpu/codegen_timing_k5.c index bdcc0ce43..42b1129fe 100644 --- a/src/cpu/codegen_timing_k5.c +++ b/src/cpu/codegen_timing_k5.c @@ -1,5 +1,5 @@ /*Most of the vector instructions here are a total guess. - Some of the timings are based on http://http://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_k5_InstLatX86.txt*/ + Some of the timings are based on https://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000502_k5_InstLatX86.txt*/ #include #include #include diff --git a/src/cpu/codegen_timing_k6.c b/src/cpu/codegen_timing_k6.c index 6a2871884..21263a25e 100644 --- a/src/cpu/codegen_timing_k6.c +++ b/src/cpu/codegen_timing_k6.c @@ -1,5 +1,5 @@ /*Most of the vector instructions here are a total guess. - Some of the timings are based on http://http://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_K6_InstLatX86.txt*/ + Some of the timings are based on https://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_K6_InstLatX86.txt*/ #include #include #include From 1f2bd5626b8d4db6c4465d353cff814ffe5a7487 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Sat, 10 Aug 2024 20:25:44 -0400 Subject: [PATCH 05/26] Dont compile codegen_timing_686.c without dynarec --- src/cpu/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt index b12356ccc..8ae97e2e6 100644 --- a/src/cpu/CMakeLists.txt +++ b/src/cpu/CMakeLists.txt @@ -30,9 +30,11 @@ endif() if(CYRIX_6X86) target_compile_definitions(cpu PRIVATE USE_CYRIX_6X86) +if(DYNAREC) add_library(ct686 OBJECT codegen_timing_686.c) target_link_libraries(86Box ct686) endif() +endif() if(DYNAREC) target_sources(cpu PRIVATE 386_dynarec_ops.c) From 244545f33720c38dbcfbf20fc1959835d57d2ff4 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Sun, 11 Aug 2024 20:43:56 -0400 Subject: [PATCH 06/26] Rename K6's decode_flush --- src/cpu/codegen_timing_k6.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cpu/codegen_timing_k6.c b/src/cpu/codegen_timing_k6.c index 21263a25e..e4c5d9493 100644 --- a/src/cpu/codegen_timing_k6.c +++ b/src/cpu/codegen_timing_k6.c @@ -1769,7 +1769,7 @@ static int fpu_st_timestamp[8]; static int last_uop_timestamp = 0; void -decode_flush(void) +decode_flush_k6(void) { int uop_timestamp = 0; @@ -1908,7 +1908,7 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc } decode_buffer.nr_uops += ins->nr_uops; - decode_flush(); + decode_flush_k6(); } else { decode_buffer.nr_uops = ins->nr_uops; decode_buffer.uops[0] = &ins->uop[0]; @@ -1922,7 +1922,7 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc case DECODE_LONG: if (decode_buffer.nr_uops) - decode_flush(); + decode_flush_k6(); decode_buffer.nr_uops = ins->nr_uops; for (c = 0; c < ins->nr_uops; c++) { @@ -1932,12 +1932,12 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc else decode_buffer.earliest_start[c] = -1; } - decode_flush(); + decode_flush_k6(); break; case DECODE_VECTOR: if (decode_buffer.nr_uops) - decode_flush(); + decode_flush_k6(); decode_timestamp++; d = 0; @@ -1953,12 +1953,12 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc if (d == 4) { d = 0; decode_buffer.nr_uops = 4; - decode_flush(); + decode_flush_k6(); } } if (d) { decode_buffer.nr_uops = d; - decode_flush(); + decode_flush_k6(); } break; } @@ -2209,7 +2209,7 @@ codegen_timing_k6_block_end(void) { if (decode_buffer.nr_uops) { int old_last_complete_timestamp = last_complete_timestamp; - decode_flush(); + decode_flush_k6(); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); } } From f38b6c00c672607ab51065fb6dfa70b2dba3894e Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Mon, 12 Aug 2024 19:23:06 -0400 Subject: [PATCH 07/26] Several C files were referenced multiple times. --- src/chipset/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chipset/CMakeLists.txt b/src/chipset/CMakeLists.txt index 3f7f6e43d..36e840dc7 100644 --- a/src/chipset/CMakeLists.txt +++ b/src/chipset/CMakeLists.txt @@ -21,7 +21,7 @@ add_library(chipset OBJECT 82c100.c acc2168.c cs8230.c ali1429.c ali1435.c ali14 sis_85c496.c sis_85c50x.c sis_5511.c sis_5571.c sis_5581.c sis_5591.c sis_5600.c sis_5511_h2p.c sis_5571_h2p.c sis_5581_h2p.c sis_5591_h2p.c sis_5600_h2p.c sis_5513_p2i.c sis_5513_ide.c sis_5572_usb.c sis_5595_pmu.c sis_55xx.c via_vt82c49x.c - via_vt82c505.c sis_85c310.c sis_85c4xx.c sis_85c496.c sis_85c50x.c gc100.c stpc.c + via_vt82c505.c gc100.c stpc.c umc_8886.c umc_hb4.c umc_8890.c via_apollo.c via_pipc.c vl82c480.c wd76c10.c) if(OLIVETTI) From ad3eaf17a91ab8d8b36d803242bbd523117f4cc7 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Mon, 12 Aug 2024 20:01:54 -0400 Subject: [PATCH 08/26] More unique names --- src/cpu/codegen_timing_486.c | 98 +++++++++++++-------------- src/cpu/codegen_timing_686.c | 108 +++++++++++++++--------------- src/cpu/codegen_timing_k6.c | 108 +++++++++++++++--------------- src/cpu/codegen_timing_p6.c | 102 ++++++++++++++-------------- src/cpu/codegen_timing_pentium.c | 98 +++++++++++++-------------- src/cpu/codegen_timing_winchip.c | 98 +++++++++++++-------------- src/cpu/codegen_timing_winchip2.c | 96 +++++++++++++------------- 7 files changed, 354 insertions(+), 354 deletions(-) diff --git a/src/cpu/codegen_timing_486.c b/src/cpu/codegen_timing_486.c index 548a9ec28..90d682211 100644 --- a/src/cpu/codegen_timing_486.c +++ b/src/cpu/codegen_timing_486.c @@ -18,7 +18,7 @@ #define CYCLES(c) (int *) c #define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = { +static int *opcode_timings_486[256] = { // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -42,7 +42,7 @@ static int *opcode_timings[256] = { // clang-format on }; -static int *opcode_timings_mod3[256] = { +static int *opcode_timings_486_mod3[256] = { // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -66,7 +66,7 @@ static int *opcode_timings_mod3[256] = { // clang-format on }; -static int *opcode_timings_0f[256] = { +static int *opcode_timings_486_0f[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -89,7 +89,7 @@ static int *opcode_timings_0f[256] = { /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, // clang-format on }; -static int *opcode_timings_0f_mod3[256] = { +static int *opcode_timings_486_0f_mod3[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -113,65 +113,65 @@ static int *opcode_timings_0f_mod3[256] = { // clang-format on }; -static int *opcode_timings_shift[8] = { +static int *opcode_timings_486_shift[8] = { // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) }; -static int *opcode_timings_shift_mod3[8] = { +static int *opcode_timings_486_shift_mod3[8] = { // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) // clang-format on }; -static int *opcode_timings_f6[8] = { +static int *opcode_timings_486_f6[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f6_mod3[8] = { +static int *opcode_timings_486_f6_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f7[8] = { +static int *opcode_timings_486_f7[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static int *opcode_timings_f7_mod3[8] = { +static int *opcode_timings_486_f7_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) }; -static int *opcode_timings_ff[8] = { +static int *opcode_timings_486_ff[8] = { // clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL }; -static int *opcode_timings_ff_mod3[8] = { +static int *opcode_timings_486_ff_mod3[8] = { // clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL // clang-format on }; -static int *opcode_timings_d8[8] = { +static int *opcode_timings_486_d8[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_d8_mod3[8] = { +static int *opcode_timings_486_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_d9[8] = { +static int *opcode_timings_486_d9[8] = { // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(3), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) // clang-format on }; -static int *opcode_timings_d9_mod3[64] = { +static int *opcode_timings_486_d9_mod3[64] = { // clang-format off /*FLD*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), @@ -192,25 +192,25 @@ static int *opcode_timings_d9_mod3[64] = { // clang-format on }; -static int *opcode_timings_da[8] = { +static int *opcode_timings_486_da[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_da_mod3[8] = { +static int *opcode_timings_486_da_mod3[8] = { // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL // clang-format on }; -static int *opcode_timings_db[8] = { +static int *opcode_timings_486_db[8] = { // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(9), NULL, CYCLES(28), CYCLES(28), NULL, CYCLES(5), NULL, CYCLES(6) // clang-format on }; -static int *opcode_timings_db_mod3[64] = { +static int *opcode_timings_486_db_mod3[64] = { // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -224,74 +224,74 @@ static int *opcode_timings_db_mod3[64] = { // clang-format on }; -static int *opcode_timings_dc[8] = { +static int *opcode_timings_486_dc[8] = { // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_dc_mod3[8] = { +static int *opcode_timings_486_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(8), CYCLES(16), NULL, NULL, CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_dd[8] = { +static int *opcode_timings_486_dd[8] = { // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(3), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(3) // clang-format on }; -static int *opcode_timings_dd_mod3[8] = { +static int *opcode_timings_486_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL // clang-format on }; -static int *opcode_timings_de[8] = { +static int *opcode_timings_486_de[8] = { // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_de_mod3[8] = { +static int *opcode_timings_486_de_mod3[8] = { // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), NULL, CYCLES(5), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_df[8] = { +static int *opcode_timings_486_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(13), NULL, CYCLES(29), CYCLES(29), NULL, CYCLES(10), CYCLES(172), CYCLES(28) // clang-format on }; -static int *opcode_timings_df_mod3[8] = { +static int *opcode_timings_486_df_mod3[8] = { // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL // clang-format on }; -static int *opcode_timings_8x[8] = { +static int *opcode_timings_486_8x[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on }; -static int *opcode_timings_8x_mod3[8] = { +static int *opcode_timings_486_8x_mod3[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on }; -static int *opcode_timings_81[8] = { +static int *opcode_timings_486_81[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on }; -static int *opcode_timings_81_mod3[8] = { +static int *opcode_timings_486_81_mod3[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on @@ -330,7 +330,7 @@ codegen_timing_486_start(void) void codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); + timing_count += COUNT(opcode_timings_486[prefix], 0); last_prefix = prefix; } @@ -344,47 +344,47 @@ codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_486_0f_mod3 : opcode_timings_486_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_486_d8_mod3 : opcode_timings_486_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_486_d9_mod3 : opcode_timings_486_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_486_da_mod3 : opcode_timings_486_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_486_db_mod3 : opcode_timings_486_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_486_dc_mod3 : opcode_timings_486_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_486_dd_mod3 : opcode_timings_486_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_486_de_mod3 : opcode_timings_486_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_486_df_mod3 : opcode_timings_486_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -394,12 +394,12 @@ codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_486_8x_mod3 : opcode_timings_486_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_486_81_mod3 : opcode_timings_486_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -410,29 +410,29 @@ codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_486_shift_mod3 : opcode_timings_486_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_486_f6_mod3 : opcode_timings_486_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_486_f7_mod3 : opcode_timings_486_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_486_ff_mod3 : opcode_timings_486_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_486_mod3 : opcode_timings_486; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_686.c b/src/cpu/codegen_timing_686.c index 6ea5ac543..285881956 100644 --- a/src/cpu/codegen_timing_686.c +++ b/src/cpu/codegen_timing_686.c @@ -65,7 +65,7 @@ static uint32_t prev_fetchdat; static uint32_t last_regmask_modified; static uint32_t regmask_modified; -static uint32_t opcode_timings[256] = { +static uint32_t opcode_timings_686[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, @@ -202,7 +202,7 @@ static uint32_t opcode_timings[256] = { // clang-format on }; -static uint32_t opcode_timings_mod3[256] = { +static uint32_t opcode_timings_686_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, @@ -340,7 +340,7 @@ static uint32_t opcode_timings_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_0f[256] = { +static uint32_t opcode_timings_686_0f[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -423,7 +423,7 @@ static uint32_t opcode_timings_0f[256] = { PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = { +static uint32_t opcode_timings_686_0f_mod3[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -506,44 +506,44 @@ static uint32_t opcode_timings_0f_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_shift[8] = { +static uint32_t opcode_timings_686_shift[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = { +static uint32_t opcode_timings_686_shift_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, // clang-format on }; -static uint32_t opcode_timings_shift_imm[8] = { +static uint32_t opcode_timings_686_shift_imm[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, // clang-format on }; -static uint32_t opcode_timings_shift_imm_mod3[8] = { +static uint32_t opcode_timings_686_shift_imm_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, // clang-format on }; -static uint32_t opcode_timings_shift_cl[8] = { +static uint32_t opcode_timings_686_shift_cl[8] = { // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), // clang-format on }; -static uint32_t opcode_timings_shift_cl_mod3[8] = { +static uint32_t opcode_timings_686_shift_cl_mod3[8] = { // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), // clang-format on }; -static uint32_t opcode_timings_f6[8] = { +static uint32_t opcode_timings_686_f6[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_RM, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -551,7 +551,7 @@ static uint32_t opcode_timings_f6[8] = { PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = { +static uint32_t opcode_timings_686_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -559,7 +559,7 @@ static uint32_t opcode_timings_f6_mod3[8] = { PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) // clang-format on }; -static uint32_t opcode_timings_f7[8] = { +static uint32_t opcode_timings_686_f7[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -567,7 +567,7 @@ static uint32_t opcode_timings_f7[8] = { PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = { +static uint32_t opcode_timings_686_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -575,7 +575,7 @@ static uint32_t opcode_timings_f7_mod3[8] = { PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) // clang-format on }; -static uint32_t opcode_timings_ff[8] = { +static uint32_t opcode_timings_686_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), @@ -583,7 +583,7 @@ static uint32_t opcode_timings_ff[8] = { PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), PAIR_XY | CYCLES(1), INVALID // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = { +static uint32_t opcode_timings_686_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), @@ -592,7 +592,7 @@ static uint32_t opcode_timings_ff_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_d8[8] = { +static uint32_t opcode_timings_686_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -600,7 +600,7 @@ static uint32_t opcode_timings_d8[8] = { PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = { +static uint32_t opcode_timings_686_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -609,7 +609,7 @@ static uint32_t opcode_timings_d8_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_d9[8] = { +static uint32_t opcode_timings_686_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -617,7 +617,7 @@ static uint32_t opcode_timings_d9[8] = { PAIR_X | CYCLES(30), PAIR_X | CYCLES(4), PAIR_X | CYCLES(24), PAIR_X | CYCLES(5) // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = { +static uint32_t opcode_timings_686_d9_mod3[64] = { // clang-format off /*FLD*/ PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -650,7 +650,7 @@ static uint32_t opcode_timings_d9_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_da[8] = { +static uint32_t opcode_timings_686_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), @@ -658,14 +658,14 @@ static uint32_t opcode_timings_da[8] = { PAIR_X | CYCLES(29), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(48) // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = { +static uint32_t opcode_timings_686_da_mod3[8] = { // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, PAIR_X | CYCLES(5), INVALID, INVALID // clang-format on }; -static uint32_t opcode_timings_db[8] = { +static uint32_t opcode_timings_686_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -673,7 +673,7 @@ static uint32_t opcode_timings_db[8] = { INVALID, PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = { +static uint32_t opcode_timings_686_db_mod3[64] = { // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -703,7 +703,7 @@ static uint32_t opcode_timings_db_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_dc[8] = { +static uint32_t opcode_timings_686_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), @@ -711,7 +711,7 @@ static uint32_t opcode_timings_dc[8] = { PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = { +static uint32_t opcode_timings_686_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, INVALID, @@ -720,7 +720,7 @@ static uint32_t opcode_timings_dc_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_dd[8] = { +static uint32_t opcode_timings_686_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -728,7 +728,7 @@ static uint32_t opcode_timings_dd[8] = { PAIR_X | CYCLES(72), INVALID, PAIR_X | CYCLES(67), PAIR_X | CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = { +static uint32_t opcode_timings_686_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ PAIR_X | CYCLES(3), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -737,14 +737,14 @@ static uint32_t opcode_timings_dd_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_de[8] = { +static uint32_t opcode_timings_686_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_X | CYCLES(27), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(38) }; -static uint32_t opcode_timings_de_mod3[8] = { +static uint32_t opcode_timings_686_de_mod3[8] = { // clang-format off /* FADD FMUL FCOMPP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, PAIR_X | CYCLES(7), @@ -753,7 +753,7 @@ static uint32_t opcode_timings_de_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_df[8] = { +static uint32_t opcode_timings_686_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_X | CYCLES(8), INVALID, PAIR_X | CYCLES(10), PAIR_X | CYCLES(13), @@ -761,7 +761,7 @@ static uint32_t opcode_timings_df[8] = { INVALID, PAIR_X | CYCLES(8), PAIR_X | CYCLES(63), PAIR_X | CYCLES(13) // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = { +static uint32_t opcode_timings_686_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -769,25 +769,25 @@ static uint32_t opcode_timings_df_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_8x[8] = { +static uint32_t opcode_timings_686_8x[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = { +static uint32_t opcode_timings_686_8x_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG // clang-format on }; -static uint32_t opcode_timings_81[8] = { +static uint32_t opcode_timings_686_81[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = { +static uint32_t opcode_timings_686_81_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG @@ -874,47 +874,47 @@ codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_686_0f_mod3 : opcode_timings_686_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_686_d8_mod3 : opcode_timings_686_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_686_d9_mod3 : opcode_timings_686_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_686_da_mod3 : opcode_timings_686_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_686_db_mod3 : opcode_timings_686_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_686_dc_mod3 : opcode_timings_686_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_686_dd_mod3 : opcode_timings_686_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_686_de_mod3 : opcode_timings_686_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_686_df_mod3 : opcode_timings_686_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -924,55 +924,55 @@ codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_686_8x_mod3 : opcode_timings_686_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_686_81_mod3 : opcode_timings_686_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; case 0xc0: case 0xc1: - timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; + timings = mod3 ? opcode_timings_686_shift_imm_mod3 : opcode_timings_686_shift_imm; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_686_shift_mod3 : opcode_timings_686_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; + timings = mod3 ? opcode_timings_686_shift_cl_mod3 : opcode_timings_686_shift_cl; deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_686_f6_mod3 : opcode_timings_686_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_686_f7_mod3 : opcode_timings_686_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_686_ff_mod3 : opcode_timings_686_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_686_mod3 : opcode_timings_686; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_k6.c b/src/cpu/codegen_timing_k6.c index e4c5d9493..5566fbbcd 100644 --- a/src/cpu/codegen_timing_k6.c +++ b/src/cpu/codegen_timing_k6.c @@ -759,7 +759,7 @@ static const risc86_instruction_t vector_wbinvd_op = { #define INVALID NULL -static const risc86_instruction_t *opcode_timings[256] = { +static const risc86_instruction_t *opcode_timings_k6[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, @@ -896,7 +896,7 @@ static const risc86_instruction_t *opcode_timings[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_mod3[256] = { +static const risc86_instruction_t *opcode_timings_k6_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, @@ -1033,7 +1033,7 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f[256] = { // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, @@ -1116,7 +1116,7 @@ static const risc86_instruction_t *opcode_timings_0f[256] = { &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f_mod3[256] = { // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, @@ -1200,7 +1200,7 @@ static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f0f[256] = { // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1283,7 +1283,7 @@ static const risc86_instruction_t *opcode_timings_0f0f[256] = { INVALID, INVALID, INVALID, INVALID, // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f0f_mod3[256] = { // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1367,57 +1367,57 @@ static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift[8] = { // clang-format off &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift_b[8] = { // clang-format off &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift_mod3[8] = { // clang-format off &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift_b_mod3[8] = { // clang-format off &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &alux_op, &alux_op, &alux_op, &alux_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_80[8] = { +static const risc86_instruction_t *opcode_timings_k6_80[8] = { // clang-format off &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_80_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_80_mod3[8] = { // clang-format off &alux_op, &alux_op, &alux_store_op, &alux_store_op, &alux_op, &alux_op, &alux_op, &alux_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x[8] = { +static const risc86_instruction_t *opcode_timings_k6_8x[8] = { // clang-format off &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_8x_mod3[8] = { // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6[8] = { +static const risc86_instruction_t *opcode_timings_k6_f6[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, @@ -1425,7 +1425,7 @@ static const risc86_instruction_t *opcode_timings_f6[8] = { &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, @@ -1433,7 +1433,7 @@ static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7[8] = { +static const risc86_instruction_t *opcode_timings_k6_f7[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, @@ -1441,7 +1441,7 @@ static const risc86_instruction_t *opcode_timings_f7[8] = { &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, @@ -1449,7 +1449,7 @@ static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff[8] = { +static const risc86_instruction_t *opcode_timings_k6_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, @@ -1457,7 +1457,7 @@ static const risc86_instruction_t *opcode_timings_ff[8] = { &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, @@ -1466,7 +1466,7 @@ static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8[8] = { +static const risc86_instruction_t *opcode_timings_k6_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1474,7 +1474,7 @@ static const risc86_instruction_t *opcode_timings_d8[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ &float_op, &float_op, &float_op, &float_op, @@ -1483,7 +1483,7 @@ static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9[8] = { +static const risc86_instruction_t *opcode_timings_k6_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1491,7 +1491,7 @@ static const risc86_instruction_t *opcode_timings_d9[8] = { &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { +static const risc86_instruction_t *opcode_timings_k6_d9_mod3[64] = { // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, @@ -1524,7 +1524,7 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_da[8] = { +static const risc86_instruction_t *opcode_timings_k6_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1532,7 +1532,7 @@ static const risc86_instruction_t *opcode_timings_da[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_da_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -1540,7 +1540,7 @@ static const risc86_instruction_t *opcode_timings_da_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_db[8] = { +static const risc86_instruction_t *opcode_timings_k6_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1548,7 +1548,7 @@ static const risc86_instruction_t *opcode_timings_db[8] = { INVALID, &vector_flde_op, INVALID, &vector_fste_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_db_mod3[64] = { +static const risc86_instruction_t *opcode_timings_k6_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1578,7 +1578,7 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc[8] = { +static const risc86_instruction_t *opcode_timings_k6_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1586,7 +1586,7 @@ static const risc86_instruction_t *opcode_timings_dc[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ &float_op, &float_op, INVALID, INVALID, @@ -1595,7 +1595,7 @@ static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd[8] = { +static const risc86_instruction_t *opcode_timings_k6_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1603,7 +1603,7 @@ static const risc86_instruction_t *opcode_timings_dd[8] = { &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, @@ -1612,7 +1612,7 @@ static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_de[8] = { +static const risc86_instruction_t *opcode_timings_k6_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1620,7 +1620,7 @@ static const risc86_instruction_t *opcode_timings_de[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_de_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ &float_op, &float_op, INVALID, &float_op, @@ -1629,7 +1629,7 @@ static const risc86_instruction_t *opcode_timings_de_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_df[8] = { +static const risc86_instruction_t *opcode_timings_k6_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1637,7 +1637,7 @@ static const risc86_instruction_t *opcode_timings_df[8] = { INVALID, &load_float_op, &vector_float_l_op, &fstore_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_df_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -2094,51 +2094,51 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t opcode = fastreadb(cs + opcode_pc); - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; + ins_table = mod3 ? opcode_timings_k6_0f0f_mod3 : opcode_timings_k6_0f0f; deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; } else { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + ins_table = mod3 ? opcode_timings_k6_0f_mod3 : opcode_timings_k6_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; } break; case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + ins_table = mod3 ? opcode_timings_k6_d8_mod3 : opcode_timings_k6_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + ins_table = mod3 ? opcode_timings_k6_d9_mod3 : opcode_timings_k6_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + ins_table = mod3 ? opcode_timings_k6_da_mod3 : opcode_timings_k6_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + ins_table = mod3 ? opcode_timings_k6_db_mod3 : opcode_timings_k6_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + ins_table = mod3 ? opcode_timings_k6_dc_mod3 : opcode_timings_k6_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + ins_table = mod3 ? opcode_timings_k6_dd_mod3 : opcode_timings_k6_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + ins_table = mod3 ? opcode_timings_k6_de_mod3 : opcode_timings_k6_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + ins_table = mod3 ? opcode_timings_k6_df_mod3 : opcode_timings_k6_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -2147,13 +2147,13 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t switch (opcode) { case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + ins_table = mod3 ? opcode_timings_k6_80_mod3 : opcode_timings_k6_80; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + ins_table = mod3 ? opcode_timings_k6_8x_mod3 : opcode_timings_k6_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; @@ -2161,7 +2161,7 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + ins_table = mod3 ? opcode_timings_k6_shift_b_mod3 : opcode_timings_k6_shift_b; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; @@ -2169,29 +2169,29 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + ins_table = mod3 ? opcode_timings_k6_shift_mod3 : opcode_timings_k6_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + ins_table = mod3 ? opcode_timings_k6_f6_mod3 : opcode_timings_k6_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + ins_table = mod3 ? opcode_timings_k6_f7_mod3 : opcode_timings_k6_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + ins_table = mod3 ? opcode_timings_k6_ff_mod3 : opcode_timings_k6_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + ins_table = mod3 ? opcode_timings_k6_mod3 : opcode_timings_k6; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index 25c098ca4..a22813068 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -787,7 +787,7 @@ static const macro_op_t wbinvd_op = { }; #define INVALID NULL -static const macro_op_t *opcode_timings[256] = { +static const macro_op_t *opcode_timings_p6[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, @@ -924,7 +924,7 @@ static const macro_op_t *opcode_timings[256] = { // clang-format on }; -static const macro_op_t *opcode_timings_mod3[256] = { +static const macro_op_t *opcode_timings_p6_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_op, &alu_op, &alup0_op, &alu_op, @@ -1062,7 +1062,7 @@ static const macro_op_t *opcode_timings_mod3[256] = { // clang-format on }; -static const macro_op_t *opcode_timings_0f[256] = { +static const macro_op_t *opcode_timings_p6_0f[256] = { // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, @@ -1145,7 +1145,7 @@ static const macro_op_t *opcode_timings_0f[256] = { &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, // clang-format on }; -static const macro_op_t *opcode_timings_0f_mod3[256] = { +static const macro_op_t *opcode_timings_p6_0f_mod3[256] = { // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, @@ -1228,58 +1228,58 @@ static const macro_op_t *opcode_timings_0f_mod3[256] = { &mmx_op, &mmx_op, &mmx_op, INVALID, }; -static const macro_op_t *opcode_timings_shift[8] = +static const macro_op_t *opcode_timings_p6_shift[8] = { // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op // clang-format on }; -static const macro_op_t *opcode_timings_shift_b[8] = { +static const macro_op_t *opcode_timings_p6_shift_b[8] = { // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op // clang-format on }; -static const macro_op_t *opcode_timings_shift_mod3[8] = { +static const macro_op_t *opcode_timings_p6_shift_mod3[8] = { // clang-format off &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op // clang-format on }; -static const macro_op_t *opcode_timings_shift_b_mod3[8] = { +static const macro_op_t *opcode_timings_p6_shift_b_mod3[8] = { // clang-format off &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op // clang-format on }; -static const macro_op_t *opcode_timings_80[8] = { +static const macro_op_t *opcode_timings_p6_80[8] = { // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, // clang-format on }; -static const macro_op_t *opcode_timings_80_mod3[8] = { +static const macro_op_t *opcode_timings_p6_80_mod3[8] = { // clang-format off &alup0_op, &alup0_op, &alup0_store_op, &alup0_store_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op, // clang-format on }; -static const macro_op_t *opcode_timings_8x[8] = { +static const macro_op_t *opcode_timings_p6_8x[8] = { // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, // clang-format on }; -static const macro_op_t *opcode_timings_8x_mod3[8] = { +static const macro_op_t *opcode_timings_p6_8x_mod3[8] = { // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, // clang-format on }; -static const macro_op_t *opcode_timings_f6[8] = { +static const macro_op_t *opcode_timings_p6_f6[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &alup0_store_op, &alup0_store_op, @@ -1287,7 +1287,7 @@ static const macro_op_t *opcode_timings_f6[8] = { &mul_mem_op, &mul_mem_op, &div16_mem_op, &div16_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_f6_mod3[8] = { +static const macro_op_t *opcode_timings_p6_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alup0_op, &alup0_op, @@ -1295,7 +1295,7 @@ static const macro_op_t *opcode_timings_f6_mod3[8] = { &mul_op, &mul_op, &div16_op, &div16_op, // clang-format on }; -static const macro_op_t *opcode_timings_f7[8] = { +static const macro_op_t *opcode_timings_p6_f7[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &alu_store_op, &alu_store_op, @@ -1303,7 +1303,7 @@ static const macro_op_t *opcode_timings_f7[8] = { &mul64_mem_op, &mul64_mem_op, &div32_mem_op, &div32_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_f7_mod3[8] = { +static const macro_op_t *opcode_timings_p6_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, @@ -1311,7 +1311,7 @@ static const macro_op_t *opcode_timings_f7_mod3[8] = { &mul64_op, &mul64_op, &div32_op, &div32_op, // clang-format on }; -static const macro_op_t *opcode_timings_ff[8] = { +static const macro_op_t *opcode_timings_p6_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &call_far_op, @@ -1319,7 +1319,7 @@ static const macro_op_t *opcode_timings_ff[8] = { &branch_op, &jmp_far_op, &push_mem_op, INVALID // clang-format on }; -static const macro_op_t *opcode_timings_ff_mod3[8] = { +static const macro_op_t *opcode_timings_p6_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ &complex_alu1_op, &complex_alu1_op, &store_op, &call_far_op, @@ -1328,7 +1328,7 @@ static const macro_op_t *opcode_timings_ff_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_d8[8] = { +static const macro_op_t *opcode_timings_p6_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, @@ -1336,7 +1336,7 @@ static const macro_op_t *opcode_timings_d8[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_d8_mod3[8] = { +static const macro_op_t *opcode_timings_p6_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ &fadd_op, &fmul_op, &float_op, &float_op, @@ -1345,7 +1345,7 @@ static const macro_op_t *opcode_timings_d8_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_d9[8] = { +static const macro_op_t *opcode_timings_p6_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1353,7 +1353,7 @@ static const macro_op_t *opcode_timings_d9[8] = { &complex_float_l_op, &fldcw_op, &complex_float_l_op, &complex_float_op // clang-format on }; -static const macro_op_t *opcode_timings_d9_mod3[64] = { +static const macro_op_t *opcode_timings_p6_d9_mod3[64] = { // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, @@ -1386,7 +1386,7 @@ static const macro_op_t *opcode_timings_d9_mod3[64] = { // clang-format on }; -static const macro_op_t *opcode_timings_da[8] = { +static const macro_op_t *opcode_timings_p6_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, @@ -1394,7 +1394,7 @@ static const macro_op_t *opcode_timings_da[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_da_mod3[8] = { +static const macro_op_t *opcode_timings_p6_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -1402,7 +1402,7 @@ static const macro_op_t *opcode_timings_da_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_db[8] = { +static const macro_op_t *opcode_timings_p6_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1410,7 +1410,7 @@ static const macro_op_t *opcode_timings_db[8] = { INVALID, &flde_op, INVALID, &fste_op // clang-format on }; -static const macro_op_t *opcode_timings_db_mod3[64] = { +static const macro_op_t *opcode_timings_p6_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1440,7 +1440,7 @@ static const macro_op_t *opcode_timings_db_mod3[64] = { // clang-format on }; -static const macro_op_t *opcode_timings_dc[8] = { +static const macro_op_t *opcode_timings_p6_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, @@ -1448,7 +1448,7 @@ static const macro_op_t *opcode_timings_dc[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_dc_mod3[8] = { +static const macro_op_t *opcode_timings_p6_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ &fadd_op, &fmul_op, INVALID, INVALID, @@ -1457,7 +1457,7 @@ static const macro_op_t *opcode_timings_dc_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_dd[8] = { +static const macro_op_t *opcode_timings_p6_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1465,7 +1465,7 @@ static const macro_op_t *opcode_timings_dd[8] = { &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op // clang-format on }; -static const macro_op_t *opcode_timings_dd_mod3[8] = { +static const macro_op_t *opcode_timings_p6_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, @@ -1474,7 +1474,7 @@ static const macro_op_t *opcode_timings_dd_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_de[8] = { +static const macro_op_t *opcode_timings_p6_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, @@ -1482,7 +1482,7 @@ static const macro_op_t *opcode_timings_de[8] = { &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, // clang-format on }; -static const macro_op_t *opcode_timings_de_mod3[8] = { +static const macro_op_t *opcode_timings_p6_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ &fadd_op, &fmul_op, INVALID, &float_op, @@ -1491,7 +1491,7 @@ static const macro_op_t *opcode_timings_de_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_df[8] = { +static const macro_op_t *opcode_timings_p6_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1499,7 +1499,7 @@ static const macro_op_t *opcode_timings_df[8] = { INVALID, &load_float_op, &complex_float_l_op, &fstore_op, // clang-format on }; -static const macro_op_t *opcode_timings_df_mod3[8] = { +static const macro_op_t *opcode_timings_p6_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -1865,47 +1865,47 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui switch (last_prefix) { case 0x0f: - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + ins_table = mod3 ? opcode_timings_p6_0f_mod3 : opcode_timings_p6_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + ins_table = mod3 ? opcode_timings_p6_d8_mod3 : opcode_timings_p6_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + ins_table = mod3 ? opcode_timings_p6_d9_mod3 : opcode_timings_p6_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + ins_table = mod3 ? opcode_timings_p6_da_mod3 : opcode_timings_p6_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + ins_table = mod3 ? opcode_timings_p6_db_mod3 : opcode_timings_p6_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + ins_table = mod3 ? opcode_timings_p6_dc_mod3 : opcode_timings_p6_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + ins_table = mod3 ? opcode_timings_p6_dd_mod3 : opcode_timings_p6_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + ins_table = mod3 ? opcode_timings_p6_de_mod3 : opcode_timings_p6_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + ins_table = mod3 ? opcode_timings_p6_df_mod3 : opcode_timings_p6_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -1914,13 +1914,13 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui switch (opcode) { case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + ins_table = mod3 ? opcode_timings_p6_80_mod3 : opcode_timings_p6_80; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + ins_table = mod3 ? opcode_timings_p6_8x_mod3 : opcode_timings_p6_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; @@ -1928,7 +1928,7 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + ins_table = mod3 ? opcode_timings_p6_shift_b_mod3 : opcode_timings_p6_shift_b; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; @@ -1936,29 +1936,29 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + ins_table = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + ins_table = mod3 ? opcode_timings_p6_f6_mod3 : opcode_timings_p6_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + ins_table = mod3 ? opcode_timings_p6_f7_mod3 : opcode_timings_p6_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + ins_table = mod3 ? opcode_timings_p6_ff_mod3 : opcode_timings_p6_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + ins_table = mod3 ? opcode_timings_p6_mod3 : opcode_timings_p6; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_pentium.c b/src/cpu/codegen_timing_pentium.c index af344307f..aa78ee1c9 100644 --- a/src/cpu/codegen_timing_pentium.c +++ b/src/cpu/codegen_timing_pentium.c @@ -110,7 +110,7 @@ static uint32_t addr_regmask; static int fpu_latency; static int fpu_st_latency[8]; -static uint64_t opcode_timings[256] = { +static uint64_t opcode_timings_p6[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, @@ -247,7 +247,7 @@ static uint64_t opcode_timings[256] = { // clang-format on }; -static uint64_t opcode_timings_mod3[256] = { +static uint64_t opcode_timings_p6_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, @@ -385,7 +385,7 @@ static uint64_t opcode_timings_mod3[256] = { // clang-format on }; -static uint64_t opcode_timings_0f[256] = { +static uint64_t opcode_timings_p6_0f[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -468,7 +468,7 @@ static uint64_t opcode_timings_0f[256] = { PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, // clang-format on }; -static uint64_t opcode_timings_0f_mod3[256] = { +static uint64_t opcode_timings_p6_0f_mod3[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -552,20 +552,20 @@ static uint64_t opcode_timings_0f_mod3[256] = { // clang-format on }; -static uint64_t opcode_timings_shift[8] = { +static uint64_t opcode_timings_p6_shift[8] = { // clang-format off PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, // clang-format on }; -static uint64_t opcode_timings_shift_mod3[8] = { +static uint64_t opcode_timings_p6_shift_mod3[8] = { // clang-format off PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, // clang-format on }; -static uint64_t opcode_timings_f6[8] = { +static uint64_t opcode_timings_p6_f6[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -573,7 +573,7 @@ static uint64_t opcode_timings_f6[8] = { PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) // clang-format on }; -static uint64_t opcode_timings_f6_mod3[8] = { +static uint64_t opcode_timings_p6_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -581,7 +581,7 @@ static uint64_t opcode_timings_f6_mod3[8] = { PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) // clang-format on }; -static uint64_t opcode_timings_f7[8] = { +static uint64_t opcode_timings_p6_f7[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -589,7 +589,7 @@ static uint64_t opcode_timings_f7[8] = { PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) // clang-format on }; -static uint64_t opcode_timings_f7_mod3[8] = { +static uint64_t opcode_timings_p6_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -597,7 +597,7 @@ static uint64_t opcode_timings_f7_mod3[8] = { PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) // clang-format on }; -static uint64_t opcode_timings_ff[8] = { +static uint64_t opcode_timings_p6_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), @@ -605,7 +605,7 @@ static uint64_t opcode_timings_ff[8] = { PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID // clang-format on }; -static uint64_t opcode_timings_ff_mod3[8] = { +static uint64_t opcode_timings_p6_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), @@ -614,7 +614,7 @@ static uint64_t opcode_timings_ff_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_d8[8] = { +static uint64_t opcode_timings_p6_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -622,7 +622,7 @@ static uint64_t opcode_timings_d8[8] = { PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) // clang-format on }; -static uint64_t opcode_timings_d8_mod3[8] = { +static uint64_t opcode_timings_p6_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -631,7 +631,7 @@ static uint64_t opcode_timings_d8_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_d9[8] = { +static uint64_t opcode_timings_p6_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), @@ -639,7 +639,7 @@ static uint64_t opcode_timings_d9[8] = { PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0) // clang-format on }; -static uint64_t opcode_timings_d9_mod3[64] = { +static uint64_t opcode_timings_p6_d9_mod3[64] = { // clang-format off /*FLD*/ PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -672,7 +672,7 @@ static uint64_t opcode_timings_d9_mod3[64] = { // clang-format on }; -static uint64_t opcode_timings_da[8] = { +static uint64_t opcode_timings_p6_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), @@ -680,7 +680,7 @@ static uint64_t opcode_timings_da[8] = { PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) // clang-format on }; -static uint64_t opcode_timings_da_mod3[8] = { +static uint64_t opcode_timings_p6_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -688,7 +688,7 @@ static uint64_t opcode_timings_da_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_db[8] = { +static uint64_t opcode_timings_p6_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), @@ -696,7 +696,7 @@ static uint64_t opcode_timings_db[8] = { INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0) // clang-format on }; -static uint64_t opcode_timings_db_mod3[64] = { +static uint64_t opcode_timings_p6_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -726,7 +726,7 @@ static uint64_t opcode_timings_db_mod3[64] = { // clang-format on }; -static uint64_t opcode_timings_dc[8] = { +static uint64_t opcode_timings_p6_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -734,7 +734,7 @@ static uint64_t opcode_timings_dc[8] = { PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) // clang-format on }; -static uint64_t opcode_timings_dc_mod3[8] = { +static uint64_t opcode_timings_p6_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID, @@ -743,7 +743,7 @@ static uint64_t opcode_timings_dc_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_dd[8] = { +static uint64_t opcode_timings_p6_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), @@ -751,7 +751,7 @@ static uint64_t opcode_timings_dd[8] = { PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0) // clang-format on }; -static uint64_t opcode_timings_dd_mod3[8] = { +static uint64_t opcode_timings_p6_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), @@ -760,7 +760,7 @@ static uint64_t opcode_timings_dd_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_de[8] = { +static uint64_t opcode_timings_p6_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), @@ -768,7 +768,7 @@ static uint64_t opcode_timings_de[8] = { PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) // clang-format on }; -static uint64_t opcode_timings_de_mod3[8] = { +static uint64_t opcode_timings_p6_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0), @@ -777,7 +777,7 @@ static uint64_t opcode_timings_de_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_df[8] = { +static uint64_t opcode_timings_p6_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), @@ -785,7 +785,7 @@ static uint64_t opcode_timings_df[8] = { INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0) // clang-format on }; -static uint64_t opcode_timings_df_mod3[8] = { +static uint64_t opcode_timings_p6_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -793,25 +793,25 @@ static uint64_t opcode_timings_df_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_81[8] = { +static uint64_t opcode_timings_p6_81[8] = { // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632 // clang-format on }; -static uint64_t opcode_timings_81_mod3[8] = { +static uint64_t opcode_timings_p6_81_mod3[8] = { // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG // clang-format on }; -static uint64_t opcode_timings_8x[8] = { +static uint64_t opcode_timings_p6_8x[8] = { // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8 // clang-format on }; -static uint64_t opcode_timings_8x_mod3[8] = { +static uint64_t opcode_timings_p6_8x_mod3[8] = { // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG @@ -1097,47 +1097,47 @@ codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_p6_0f_mod3 : opcode_timings_p6_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_p6_d8_mod3 : opcode_timings_p6_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_p6_d9_mod3 : opcode_timings_p6_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_p6_da_mod3 : opcode_timings_p6_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_p6_db_mod3 : opcode_timings_p6_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_p6_dc_mod3 : opcode_timings_p6_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_p6_dd_mod3 : opcode_timings_p6_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_p6_de_mod3 : opcode_timings_p6_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_p6_df_mod3 : opcode_timings_p6_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -1147,12 +1147,12 @@ codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_p6_8x_mod3 : opcode_timings_p6_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_p6_81_mod3 : opcode_timings_p6_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -1161,36 +1161,36 @@ codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0xc1: case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift; deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_p6_f6_mod3 : opcode_timings_p6_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_p6_f7_mod3 : opcode_timings_p6_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_p6_ff_mod3 : opcode_timings_p6_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_p6_mod3 : opcode_timings_p6; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_winchip.c b/src/cpu/codegen_timing_winchip.c index 76ea8b954..3597517ce 100644 --- a/src/cpu/codegen_timing_winchip.c +++ b/src/cpu/codegen_timing_winchip.c @@ -18,7 +18,7 @@ #define CYCLES(c) (int *) c #define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = { +static int *opcode_timings_winchip[256] = { // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -42,7 +42,7 @@ static int *opcode_timings[256] = { // clang-format on }; -static int *opcode_timings_mod3[256] = { +static int *opcode_timings_winchip_mod3[256] = { // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -66,7 +66,7 @@ static int *opcode_timings_mod3[256] = { // clang-format on }; -static int *opcode_timings_0f[256] = { +static int *opcode_timings_winchip_0f[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -89,7 +89,7 @@ static int *opcode_timings_0f[256] = { /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, // clang-format on }; -static int *opcode_timings_0f_mod3[256] = { +static int *opcode_timings_winchip_0f_mod3[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -113,68 +113,68 @@ static int *opcode_timings_0f_mod3[256] = { // clang-format on }; -static int *opcode_timings_shift[8] = { +static int *opcode_timings_winchip_shift[8] = { // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) // clang-format on }; -static int *opcode_timings_shift_mod3[8] = { +static int *opcode_timings_winchip_shift_mod3[8] = { // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) // clang-format on }; -static int *opcode_timings_f6[8] = { +static int *opcode_timings_winchip_f6[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f6_mod3[8] = { +static int *opcode_timings_winchip_f6_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f7[8] = { +static int *opcode_timings_winchip_f7[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static int *opcode_timings_f7_mod3[8] = { +static int *opcode_timings_winchip_f7_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static int *opcode_timings_ff[8] = { +static int *opcode_timings_winchip_ff[8] = { // clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL // clang-format on }; -static int *opcode_timings_ff_mod3[8] = { +static int *opcode_timings_winchip_ff_mod3[8] = { // clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL // clang-format on }; -static int *opcode_timings_d8[8] = { +static int *opcode_timings_winchip_d8[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) // clang-format on }; -static int *opcode_timings_d8_mod3[8] = { +static int *opcode_timings_winchip_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) // clang-format on }; -static int *opcode_timings_d9[8] = { +static int *opcode_timings_winchip_d9[8] = { // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(2), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) // clang-format on }; -static int *opcode_timings_d9_mod3[64] = { +static int *opcode_timings_winchip_d9_mod3[64] = { // clang-format off /*FLD*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), @@ -195,25 +195,25 @@ static int *opcode_timings_d9_mod3[64] = { // clang-format on }; -static int *opcode_timings_da[8] = { +static int *opcode_timings_winchip_da[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) // clang-format on }; -static int *opcode_timings_da_mod3[8] = { +static int *opcode_timings_winchip_da_mod3[8] = { // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL // clang-format on }; -static int *opcode_timings_db[8] = { +static int *opcode_timings_winchip_db[8] = { // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), NULL, CYCLES(8) // clang-format on }; -static int *opcode_timings_db_mod3[64] = { +static int *opcode_timings_winchip_db_mod3[64] = { // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -227,71 +227,71 @@ static int *opcode_timings_db_mod3[64] = { // clang-format on }; -static int *opcode_timings_dc[8] = { +static int *opcode_timings_winchip_dc[8] = { // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(6), CYCLES(8), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(74), CYCLES(74) // clang-format on }; -static int *opcode_timings_dc_mod3[8] = { +static int *opcode_timings_winchip_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(4), CYCLES(6), NULL, NULL, CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) // clang-format on }; -static int *opcode_timings_dd[8] = { +static int *opcode_timings_winchip_dd[8] = { // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(2), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(5) // clang-format on }; -static int *opcode_timings_dd_mod3[8] = { +static int *opcode_timings_winchip_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL // clang-format on }; -static int *opcode_timings_de[8] = { +static int *opcode_timings_winchip_de[8] = { // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) // clang-format on }; -static int *opcode_timings_de_mod3[8] = { +static int *opcode_timings_winchip_de_mod3[8] = { // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), NULL, CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) // clang-format on }; -static int *opcode_timings_df[8] = { +static int *opcode_timings_winchip_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), CYCLES(172), CYCLES(8) // clang-format on }; -static int *opcode_timings_df_mod3[8] = { +static int *opcode_timings_winchip_df_mod3[8] = { // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL // clang-format on }; -static int *opcode_timings_8x[8] = { +static int *opcode_timings_winchip_8x[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; -static int *opcode_timings_8x_mod3[8] = +static int *opcode_timings_winchip_8x_mod3[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; -static int *opcode_timings_81[8] = +static int *opcode_timings_winchip_81[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; -static int *opcode_timings_81_mod3[8] = +static int *opcode_timings_winchip_81_mod3[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on @@ -330,7 +330,7 @@ codegen_timing_winchip_start(void) void codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); + timing_count += COUNT(opcode_timings_winchip[prefix], 0); last_prefix = prefix; } @@ -344,47 +344,47 @@ codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_winchip_0f_mod3 : opcode_timings_winchip_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_winchip_d8_mod3 : opcode_timings_winchip_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_winchip_d9_mod3 : opcode_timings_winchip_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_winchip_da_mod3 : opcode_timings_winchip_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_winchip_db_mod3 : opcode_timings_winchip_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_winchip_dc_mod3 : opcode_timings_winchip_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_winchip_dd_mod3 : opcode_timings_winchip_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_winchip_de_mod3 : opcode_timings_winchip_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_winchip_df_mod3 : opcode_timings_winchip_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -394,12 +394,12 @@ codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_winchip_8x_mod3 : opcode_timings_winchip_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_winchip_81_mod3 : opcode_timings_winchip_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -410,29 +410,29 @@ codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_winchip_shift_mod3 : opcode_timings_winchip_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_winchip_f6_mod3 : opcode_timings_winchip_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_winchip_f7_mod3 : opcode_timings_winchip_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_winchip_ff_mod3 : opcode_timings_winchip_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_winchip_mod3 : opcode_timings_winchip; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_winchip2.c b/src/cpu/codegen_timing_winchip2.c index ea206f068..f37fe3366 100644 --- a/src/cpu/codegen_timing_winchip2.c +++ b/src/cpu/codegen_timing_winchip2.c @@ -63,7 +63,7 @@ #define INVALID 0 -static uint32_t opcode_timings[256] = { +static uint32_t opcode_timings_winchip2[256] = { // clang-format off /*00*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), @@ -87,7 +87,7 @@ static uint32_t opcode_timings[256] = { // clang-format on }; -static uint32_t opcode_timings_mod3[256] = { +static uint32_t opcode_timings_winchip2_mod3[256] = { // clang-format off /*00*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), @@ -111,7 +111,7 @@ static uint32_t opcode_timings_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_0f[256] = { +static uint32_t opcode_timings_winchip2_0f[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -134,7 +134,7 @@ static uint32_t opcode_timings_0f[256] = { /*f0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = { +static uint32_t opcode_timings_winchip2_0f_mod3[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -158,49 +158,49 @@ static uint32_t opcode_timings_0f_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_shift[8] = { +static uint32_t opcode_timings_winchip2_shift[8] = { // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = { +static uint32_t opcode_timings_winchip2_shift_mod3[8] = { // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) // clang-format on }; -static uint32_t opcode_timings_f6[8] = { +static uint32_t opcode_timings_winchip2_f6[8] = { // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = { +static uint32_t opcode_timings_winchip2_f6_mod3[8] = { // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static uint32_t opcode_timings_f7[8] = { +static uint32_t opcode_timings_winchip2_f7[8] = { // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = { +static uint32_t opcode_timings_winchip2_f7_mod3[8] = { // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static uint32_t opcode_timings_ff[8] = { +static uint32_t opcode_timings_winchip2_ff[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = { +static uint32_t opcode_timings_winchip2_ff_mod3[8] = { // clang-format off CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID // clang-format on }; -static uint32_t opcode_timings_d8[8] = { +static uint32_t opcode_timings_winchip2_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -208,7 +208,7 @@ static uint32_t opcode_timings_d8[8] = { FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = { +static uint32_t opcode_timings_winchip2_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -217,7 +217,7 @@ static uint32_t opcode_timings_d8_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_d9[8] = { +static uint32_t opcode_timings_winchip2_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), @@ -225,7 +225,7 @@ static uint32_t opcode_timings_d9[8] = { FPU_CYCLES(32,0,0), FPU_CYCLES(8,0,0), FPU_CYCLES(48,0,0), FPU_CYCLES(2,0,0) // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = { +static uint32_t opcode_timings_winchip2_d9_mod3[64] = { // clang-format off /*FLD*/ FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -258,7 +258,7 @@ static uint32_t opcode_timings_d9_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_da[8] = { +static uint32_t opcode_timings_winchip2_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), @@ -266,7 +266,7 @@ static uint32_t opcode_timings_da[8] = { FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = { +static uint32_t opcode_timings_winchip2_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -274,7 +274,7 @@ static uint32_t opcode_timings_da_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_db[8] = { +static uint32_t opcode_timings_winchip2_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), @@ -282,7 +282,7 @@ static uint32_t opcode_timings_db[8] = { INVALID, FPU_CYCLES(3,0,0), INVALID, FPU_CYCLES(3,0,0) // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = { +static uint32_t opcode_timings_winchip2_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -312,7 +312,7 @@ static uint32_t opcode_timings_db_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_dc[8] = { +static uint32_t opcode_timings_winchip2_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -320,7 +320,7 @@ static uint32_t opcode_timings_dc[8] = { FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = { +static uint32_t opcode_timings_winchip2_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),INVALID, INVALID, @@ -329,7 +329,7 @@ static uint32_t opcode_timings_dc_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_dd[8] = { +static uint32_t opcode_timings_winchip2_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), @@ -337,7 +337,7 @@ static uint32_t opcode_timings_dd[8] = { FPU_CYCLES(70,0,0), INVALID, FPU_CYCLES(127,0,0), FPU_CYCLES(6,0,0) // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = { +static uint32_t opcode_timings_winchip2_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ FPU_CYCLES(2,0,0), INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -346,7 +346,7 @@ static uint32_t opcode_timings_dd_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_de[8] = { +static uint32_t opcode_timings_winchip2_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), @@ -354,7 +354,7 @@ static uint32_t opcode_timings_de[8] = { FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) // clang-format on }; -static uint32_t opcode_timings_de_mod3[8] = { +static uint32_t opcode_timings_winchip2_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(1,0,0), @@ -363,7 +363,7 @@ static uint32_t opcode_timings_de_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_df[8] = { +static uint32_t opcode_timings_winchip2_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), @@ -371,7 +371,7 @@ static uint32_t opcode_timings_df[8] = { INVALID, FPU_CYCLES(3,2,2), FPU_CYCLES(148,0,0), FPU_CYCLES(6,0,0) // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = { +static uint32_t opcode_timings_winchip2_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -379,22 +379,22 @@ static uint32_t opcode_timings_df_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_8x[8] = { +static uint32_t opcode_timings_winchip2_8x[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = { +static uint32_t opcode_timings_winchip2_8x_mod3[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_81[8] = { +static uint32_t opcode_timings_winchip2_81[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = { +static uint32_t opcode_timings_winchip2_81_mod3[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on @@ -613,47 +613,47 @@ codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNU switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_winchip2_0f_mod3 : opcode_timings_winchip2_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_winchip2_d8_mod3 : opcode_timings_winchip2_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_winchip2_d9_mod3 : opcode_timings_winchip2_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_winchip2_da_mod3 : opcode_timings_winchip2_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_winchip2_db_mod3 : opcode_timings_winchip2_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_winchip2_dc_mod3 : opcode_timings_winchip2_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_winchip2_dd_mod3 : opcode_timings_winchip2_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_winchip2_de_mod3 : opcode_timings_winchip2_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_winchip2_df_mod3 : opcode_timings_winchip2_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -663,12 +663,12 @@ codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNU case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_winchip2_8x_mod3 : opcode_timings_winchip2_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_winchip2_81_mod3 : opcode_timings_winchip2_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -679,29 +679,29 @@ codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNU case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_winchip2_shift_mod3 : opcode_timings_winchip2_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_winchip2_f6_mod3 : opcode_timings_winchip2_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_winchip2_f7_mod3 : opcode_timings_winchip2_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_winchip2_ff_mod3 : opcode_timings_winchip2_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_winchip2_mod3 : opcode_timings_winchip2; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } From bc4e33f09f312580b791979c814e7bc61ce20192 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Mon, 12 Aug 2024 20:53:02 -0400 Subject: [PATCH 09/26] fix cmake for the no dynarec case Temp test --- src/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b3da5791..0841fabdd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -139,8 +139,10 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) include_directories(include) if(NEW_DYNAREC) include_directories(cpu codegen_new) -else() +elseif(DYNAREC) include_directories(cpu codegen) +else() + include_directories(cpu) endif() add_subdirectory(cdrom) @@ -149,7 +151,7 @@ add_subdirectory(chipset) add_subdirectory(cpu) if(NEW_DYNAREC) add_subdirectory(codegen_new) -else() +elseif(DYNAREC) add_subdirectory(codegen) endif() From 70c622aecdd9e730b8baaf0aa420629318a5d1af Mon Sep 17 00:00:00 2001 From: TC1995 Date: Wed, 14 Aug 2024 19:24:55 +0200 Subject: [PATCH 10/26] S3 ViRGE class: Rethreading for future investigation. --- src/video/vid_s3_virge.c | 213 ++++++++++++++++++++++++--------------- 1 file changed, 133 insertions(+), 80 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index d2f99ba14..3ad183468 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -58,7 +58,7 @@ static int dither[4][4] = { #define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) #define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) -#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) +#define FIFO_NOT_EMPTY (virge->fifo_read_idx < virge->fifo_write_idx) #define FIFO_TYPE 0xff000000 #define FIFO_ADDR 0x00ffffff @@ -74,9 +74,6 @@ static int dither[4][4] = { #define ROM_DIAMOND_STEALTH3D_4000 "roms/video/s3virge/86c357.bin" #define ROM_TRIO3D2X "roms/video/s3virge/TRIO3D2X_8mbsdr.VBI" -#define FIFO_STATE_IDLE 0 -#define FIFO_STATE_RUN 1 - enum { S3_VIRGE_325, S3_DIAMOND_STEALTH3D_2000, @@ -278,18 +275,22 @@ typedef struct virge_t { uint64_t blitter_time; int fifo_slots_num; - struct { - uint32_t addr_type; - uint32_t val; - } fifo[FIFO_SIZE]; + fifo_entry_t fifo[FIFO_SIZE]; + atomic_int fifo_write_idx, fifo_read_idx; - - pc_timer_t fifo_timer; - pc_timer_t render_timer; - atomic_int virge_busy; + + uint8_t render_thread_run; + thread_t *render_thread; + event_t *wake_render_thread; + event_t *not_full_event; + + uint8_t fifo_thread_run; + thread_t *fifo_thread; + event_t *wake_fifo_thread; + event_t *fifo_not_full_event; + int local; - int fifo_state; uint8_t subsys_stat, subsys_cntl, advfunc_cntl; @@ -297,8 +298,6 @@ typedef struct virge_t { void *i2c, *ddc; - int waiting; - int onboard; } virge_t; @@ -323,6 +322,8 @@ static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv); static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv); static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv); +static void s3_virge_fifo_thread(void *priv); + enum { CMD_SET_AE = 1, CMD_SET_HC = (1 << 1), @@ -395,12 +396,42 @@ s3_virge_log(const char *fmt, ...) # define s3_virge_log(fmt, ...) #endif +static __inline void +s3_virge_wake_fifo_thread(virge_t *virge) +{ + thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + +static void +s3_virge_wait_fifo_idle(virge_t *virge) +{ + while (FIFO_NOT_EMPTY) { + s3_virge_wake_fifo_thread(virge); + thread_wait_event(virge->fifo_not_full_event, 1); + } +} + +static void +s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) +{ + if (FIFO_FULL) { + thread_reset_event(virge->fifo_not_full_event); + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } + virge->fifo[virge->fifo_write_idx].val = val; + virge->fifo[virge->fifo_write_idx].addr_type = (addr & FIFO_ADDR) | type; + + virge->fifo_write_idx++; + if ((FIFO_ENTRIES < 8) || (FIFO_ENTRIES > 0xe000)) + s3_virge_wake_fifo_thread(virge); +} + static void queue_triangle(virge_t *virge) { if (RB_FULL) { - s3_virge_log("RB FULL, TBD.\n"); - return; + thread_reset_event(virge->not_full_event); + thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/ } virge->s3d_buffer[virge->s3d_write_idx] = virge->s3d_tri; @@ -409,7 +440,7 @@ queue_triangle(virge_t *virge) virge->s3d_write_idx = 0; if (!virge->s3d_busy) - timer_set_delay_u64(&virge->render_timer, 100 * TIMER_USEC); + thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/ } static void @@ -422,25 +453,27 @@ s3_virge_update_irqs(virge_t *virge) } static void -s3_virge_render_timer(void *priv) +s3_virge_render_thread(void *priv) { virge_t *virge = (virge_t *) priv; - virge->s3d_busy = 1; - while (virge->s3d_read_idx < virge->s3d_write_idx) { - s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx]); - virge->s3d_read_idx++; - if (virge->s3d_read_idx >= RB_SIZE) - virge->s3d_read_idx = 0; + if (virge->render_thread_run) { + thread_wait_event(virge->wake_render_thread, -1); + thread_reset_event(virge->wake_render_thread); + virge->s3d_busy = 1; + while (virge->s3d_read_idx < virge->s3d_write_idx) { + s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx]); + virge->s3d_read_idx++; + if (virge->s3d_read_idx >= RB_SIZE) + virge->s3d_read_idx = 0; - if (RB_ENTRIES == RB_MASK) - timer_disable(&virge->render_timer); + if (RB_ENTRIES == RB_MASK) + thread_set_event(virge->not_full_event); + } + virge->s3d_busy = 0; + virge->subsys_stat |= INT_S3D_DONE; + s3_virge_update_irqs(virge); } - virge->s3d_busy = 0; - virge->s3d_read_idx = 0; - virge->s3d_write_idx = 0; - virge->subsys_stat |= INT_S3D_DONE; - s3_virge_update_irqs(virge); } static void @@ -1387,7 +1420,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) case 0xad00: /*2D Polygon Command Set*/ virge->s3d.cmd_set = val; if (!(val & CMD_SET_AE)) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1403,7 +1436,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) virge->s3d.rdest_x = (val >> 16) & 0x7ff; virge->s3d.rdest_y = val & 0x7ff; if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1424,7 +1457,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) virge->s3d.lycnt = val & 0x7ff; virge->s3d.line_dir = val >> 31; if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1447,7 +1480,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) case 0xad7c: /*2D Polygon Y Count*/ virge->s3d.pycnt = val & 0x300007ff; if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1619,19 +1652,19 @@ s3_virge_mmio_read(uint32_t addr, void *priv) switch (addr & 0xffff) { case 0x8504: if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + s3_virge_wake_fifo_thread(virge); ret = virge->subsys_stat; return ret; case 0x8505: ret = 0xc0; - if (virge->s3d_busy || virge->virge_busy || (virge->fifo_read_idx < virge->fifo_write_idx)) + if (virge->s3d_busy || virge->virge_busy || FIFO_NOT_EMPTY) ret |= 0x10; else ret |= 0x30; if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + s3_virge_wake_fifo_thread(virge); return ret; case 0x850c: @@ -1721,14 +1754,14 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) switch (addr & 0xfffe) { case 0x8504: ret = 0xc000; - if (virge->s3d_busy || virge->virge_busy || (virge->fifo_read_idx < virge->fifo_write_idx)) + if (virge->s3d_busy || virge->virge_busy || FIFO_NOT_EMPTY) ret |= 0x1000; else ret |= 0x3000; ret |= virge->subsys_stat; if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + s3_virge_wake_fifo_thread(virge); return ret; case 0x850c: @@ -1822,14 +1855,15 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) case 0x8504: ret = 0x0000c000; - if (virge->s3d_busy || virge->virge_busy || (virge->fifo_read_idx < virge->fifo_write_idx)) + if (virge->s3d_busy || virge->virge_busy || FIFO_NOT_EMPTY) ret |= 0x00001000; else ret |= 0x00003000; ret |= virge->subsys_stat; if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + s3_virge_wake_fifo_thread(virge); + pclog("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx); break; case 0x850c: @@ -1852,96 +1886,122 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) case 0xa4d4: case 0xa8d4: case 0xacd4: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.src_base; break; case 0xa4d8: case 0xa8d8: case 0xacd8: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.dest_base; break; case 0xa4dc: case 0xa8dc: case 0xacdc: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r; break; case 0xa4e0: case 0xa8e0: case 0xace0: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b; break; case 0xa4e4: case 0xa8e4: case 0xace4: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str; break; case 0xa4e8: case 0xace8: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.mono_pat_0; break; case 0xa4ec: case 0xacec: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.mono_pat_1; break; case 0xa4f0: case 0xacf0: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pat_bg_clr; break; case 0xa4f4: case 0xa8f4: case 0xacf4: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pat_fg_clr; break; case 0xa4f8: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.src_bg_clr; break; case 0xa4fc: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.src_fg_clr; break; case 0xa500: case 0xa900: case 0xad00: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.cmd_set; break; case 0xa504: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.r_width << 16) | virge->s3d.r_height; break; case 0xa508: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y; break; case 0xa50c: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y; break; case 0xa96c: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.lxend0 << 16) | virge->s3d.lxend1; break; case 0xa970: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.ldx; break; case 0xa974: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.lxstart; break; case 0xa978: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.lystart; break; case 0xa97c: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.line_dir << 31) | virge->s3d.lycnt; break; case 0xad68: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.prdx; break; case 0xad6c: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.prxstart; break; case 0xad70: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pldx; break; case 0xad74: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.plxstart; break; case 0xad78: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pystart; break; case 0xad7c: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pycnt; break; @@ -1955,15 +2015,17 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) } static void -s3_virge_fifo_timer(void *priv) +s3_virge_fifo_thread(void *priv) { virge_t *virge = (virge_t *)priv; - timer_advance_u64(&virge->fifo_timer, 100 * TIMER_USEC); - - while (virge->fifo_state == FIFO_STATE_RUN) { + while (virge->fifo_thread_run) { + thread_set_event(virge->fifo_not_full_event); + thread_wait_event(virge->wake_fifo_thread, -1); + thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; - while (virge->fifo_read_idx < virge->fifo_write_idx) { + while (FIFO_NOT_EMPTY) { + s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR); switch (virge->fifo[virge->fifo_read_idx].addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: s3_virge_mmio_fifo_write(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); @@ -1980,38 +2042,14 @@ s3_virge_fifo_timer(void *priv) virge->fifo[virge->fifo_read_idx].addr_type = FIFO_INVALID; virge->fifo_read_idx++; - if (virge->fifo_read_idx >= FIFO_SIZE) - virge->fifo_read_idx = 0; - - s3_virge_log("ReadIDX=%d.\n", virge->fifo_read_idx); } virge->virge_busy = 0; + s3_virge_log("Complete.\n"); virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; s3_virge_update_irqs(virge); - virge->fifo_read_idx = 0; - virge->fifo_write_idx = 0; - virge->fifo_state = FIFO_STATE_IDLE; } } -static void -s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) -{ - if (FIFO_FULL) { - s3_virge_log("FIFO FULL, TBD.\n"); - return; - } - - virge->fifo[virge->fifo_write_idx].val = val; - virge->fifo[virge->fifo_write_idx].addr_type = (addr & FIFO_ADDR) | type; - virge->fifo_write_idx++; - if (virge->fifo_write_idx >= FIFO_SIZE) - virge->fifo_write_idx = 0; - - s3_virge_log("WriteIDX=%d.\n", virge->fifo_write_idx); - virge->fifo_state = FIFO_STATE_RUN; -} - static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) { @@ -2105,7 +2143,7 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) virge_t *virge = (virge_t *) priv; svga_t *svga = &virge->svga; - s3_virge_log("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); + pclog("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); if (((addr & 0xfffc) < 0x8000) || ((addr & 0xe000) == 0xa000)) s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); else { @@ -4397,9 +4435,6 @@ s3_virge_disable_handlers(virge_t *dev) reset_state->svga.timer = dev->svga.timer; reset_state->svga.timer8514 = dev->svga.timer8514; - - reset_state->fifo_timer = dev->fifo_timer; - reset_state->render_timer = dev->render_timer; } static void @@ -4412,7 +4447,6 @@ s3_virge_reset(void *priv) dev->virge_busy = 0; dev->fifo_write_idx = 0; dev->fifo_read_idx = 0; - dev->fifo_state = FIFO_STATE_IDLE; dev->s3d_busy = 0; dev->s3d_write_idx = 0; dev->s3d_read_idx = 0; @@ -4657,8 +4691,15 @@ s3_virge_init(const device_t *info) virge->svga.force_old_addr = 1; - timer_add(&virge->fifo_timer, s3_virge_fifo_timer, virge, 1); - timer_add(&virge->render_timer, s3_virge_render_timer, virge, 0); + virge->wake_render_thread = thread_create_event(); + virge->not_full_event = thread_create_event(); + virge->render_thread_run = 1; + virge->render_thread = thread_create(s3_virge_render_thread, virge); + + virge->wake_fifo_thread = thread_create_event(); + virge->fifo_not_full_event = thread_create_event(); + virge->fifo_thread_run = 1; + virge->fifo_thread = thread_create(s3_virge_fifo_thread, virge); virge->local = info->local; @@ -4672,6 +4713,18 @@ s3_virge_close(void *priv) { virge_t *virge = (virge_t *) priv; + virge->render_thread_run = 0; + thread_set_event(virge->wake_render_thread); + thread_wait(virge->render_thread); + thread_destroy_event(virge->not_full_event); + thread_destroy_event(virge->wake_render_thread); + + virge->fifo_thread_run = 0; + thread_set_event(virge->wake_fifo_thread); + thread_wait(virge->fifo_thread); + thread_destroy_event(virge->fifo_not_full_event); + thread_destroy_event(virge->wake_fifo_thread); + svga_close(&virge->svga); ddc_close(virge->ddc); From a7b7b1af679469c849be9b45727a603ca14ea840 Mon Sep 17 00:00:00 2001 From: TC1995 Date: Wed, 14 Aug 2024 20:33:32 +0200 Subject: [PATCH 11/26] Attempt at fixing the threading of the ViRGE. --- src/video/vid_s3_virge.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 3ad183468..9c581ea39 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -275,9 +275,13 @@ typedef struct virge_t { uint64_t blitter_time; int fifo_slots_num; - fifo_entry_t fifo[FIFO_SIZE]; + struct { + uint32_t addr_type; + uint32_t val; + } fifo[FIFO_SIZE]; atomic_int fifo_write_idx, fifo_read_idx; + atomic_int fifo_write_idx2; atomic_int virge_busy; uint8_t render_thread_run; @@ -422,7 +426,8 @@ s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) virge->fifo[virge->fifo_write_idx].addr_type = (addr & FIFO_ADDR) | type; virge->fifo_write_idx++; - if ((FIFO_ENTRIES < 8) || (FIFO_ENTRIES > 0xe000)) + + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) s3_virge_wake_fifo_thread(virge); } @@ -1420,7 +1425,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) case 0xad00: /*2D Polygon Command Set*/ virge->s3d.cmd_set = val; if (!(val & CMD_SET_AE)) { - pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1436,7 +1441,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) virge->s3d.rdest_x = (val >> 16) & 0x7ff; virge->s3d.rdest_y = val & 0x7ff; if (virge->s3d.cmd_set & CMD_SET_AE) { - pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1457,7 +1462,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) virge->s3d.lycnt = val & 0x7ff; virge->s3d.line_dir = val >> 31; if (virge->s3d.cmd_set & CMD_SET_AE) { - pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1480,7 +1485,7 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) case 0xad7c: /*2D Polygon Y Count*/ virge->s3d.pycnt = val & 0x300007ff; if (virge->s3d.cmd_set & CMD_SET_AE) { - pclog("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); + s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); s3_virge_bitblt(virge, -1, 0); } break; @@ -1863,7 +1868,8 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) ret |= virge->subsys_stat; if (!virge->virge_busy) s3_virge_wake_fifo_thread(virge); - pclog("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx); + + s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx2); break; case 0x850c: @@ -2025,7 +2031,7 @@ s3_virge_fifo_thread(void *priv) thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; while (FIFO_NOT_EMPTY) { - s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR); + s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x, val=%08x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val); switch (virge->fifo[virge->fifo_read_idx].addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: s3_virge_mmio_fifo_write(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); @@ -2042,11 +2048,18 @@ s3_virge_fifo_thread(void *priv) virge->fifo[virge->fifo_read_idx].addr_type = FIFO_INVALID; virge->fifo_read_idx++; + + if (FIFO_ENTRIES > 0xe000) + thread_set_event(virge->fifo_not_full_event); + } + if (virge->fifo_read_idx >= virge->fifo_write_idx) { + virge->fifo_read_idx = 0; + virge->fifo_write_idx = 0; + virge->virge_busy = 0; + s3_virge_log("Complete.\n"); + virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; + s3_virge_update_irqs(virge); } - virge->virge_busy = 0; - s3_virge_log("Complete.\n"); - virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; - s3_virge_update_irqs(virge); } } @@ -2143,7 +2156,7 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) virge_t *virge = (virge_t *) priv; svga_t *svga = &virge->svga; - pclog("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); + s3_virge_log("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); if (((addr & 0xfffc) < 0x8000) || ((addr & 0xe000) == 0xa000)) s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); else { From 08fc20bcc5a099a67e30bf56039f50f7e99755e7 Mon Sep 17 00:00:00 2001 From: OBattler Date: Wed, 14 Aug 2024 22:47:06 +0200 Subject: [PATCH 12/26] S3 Virge: Bring the threading completely up to par with PCem. --- src/video/vid_s3_virge.c | 952 ++++++++++++++++++++------------------- 1 file changed, 482 insertions(+), 470 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 9c581ea39..c5ed474bf 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -54,11 +54,13 @@ static int dither[4][4] = { #define RB_FULL (RB_ENTRIES == RB_SIZE) #define RB_EMPTY (!RB_ENTRIES) -#define FIFO_SIZE (16 * 4096) +#define FIFO_SIZE 65536 +#define FIFO_MASK (FIFO_SIZE - 1) +#define FIFO_ENTRY_SIZE (1 << 31) #define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) #define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) -#define FIFO_NOT_EMPTY (virge->fifo_read_idx < virge->fifo_write_idx) +#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) #define FIFO_TYPE 0xff000000 #define FIFO_ADDR 0x00ffffff @@ -231,7 +233,7 @@ typedef struct virge_t { uint32_t dest_l, dest_r; } s3d; - s3d_t s3d_tri; + s3d_t s3d_tri; s3d_t s3d_buffer[RB_SIZE]; atomic_int s3d_read_idx, s3d_write_idx; @@ -275,21 +277,18 @@ typedef struct virge_t { uint64_t blitter_time; int fifo_slots_num; - struct { - uint32_t addr_type; - uint32_t val; - } fifo[FIFO_SIZE]; + fifo_entry_t fifo[FIFO_SIZE]; - atomic_int fifo_write_idx, fifo_read_idx; - atomic_int fifo_write_idx2; - atomic_int virge_busy; + atomic_int fifo_write_idx, fifo_read_idx; + atomic_int virge_busy; + + atomic_int fifo_thread_run, render_thread_run; - uint8_t render_thread_run; thread_t *render_thread; event_t *wake_render_thread; + event_t *wake_main_thread; event_t *not_full_event; - uint8_t fifo_thread_run; thread_t *fifo_thread; event_t *wake_fifo_thread; event_t *fifo_not_full_event; @@ -312,6 +311,10 @@ static video_timings_t timing_diamond_stealth3d_3000_pci = { .type = VIDEO_PCI, static video_timings_t timing_virge_dx_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; static video_timings_t timing_virge_agp = { .type = VIDEO_AGP, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; +static inline void wake_fifo_thread(virge_t *virge) { + thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri); static void s3_virge_recalctimings(svga_t *svga); @@ -326,7 +329,8 @@ static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv); static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv); static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv); -static void s3_virge_fifo_thread(void *priv); +static void queue_triangle(virge_t *virge); +static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type); enum { CMD_SET_AE = 1, @@ -406,48 +410,6 @@ s3_virge_wake_fifo_thread(virge_t *virge) thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ } -static void -s3_virge_wait_fifo_idle(virge_t *virge) -{ - while (FIFO_NOT_EMPTY) { - s3_virge_wake_fifo_thread(virge); - thread_wait_event(virge->fifo_not_full_event, 1); - } -} - -static void -s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) -{ - if (FIFO_FULL) { - thread_reset_event(virge->fifo_not_full_event); - thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ - } - virge->fifo[virge->fifo_write_idx].val = val; - virge->fifo[virge->fifo_write_idx].addr_type = (addr & FIFO_ADDR) | type; - - virge->fifo_write_idx++; - - if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) - s3_virge_wake_fifo_thread(virge); -} - -static void -queue_triangle(virge_t *virge) -{ - if (RB_FULL) { - thread_reset_event(virge->not_full_event); - thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/ - } - - virge->s3d_buffer[virge->s3d_write_idx] = virge->s3d_tri; - virge->s3d_write_idx++; - if (virge->s3d_write_idx >= RB_SIZE) - virge->s3d_write_idx = 0; - - if (!virge->s3d_busy) - thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/ -} - static void s3_virge_update_irqs(virge_t *virge) { @@ -457,30 +419,6 @@ s3_virge_update_irqs(virge_t *virge) pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } -static void -s3_virge_render_thread(void *priv) -{ - virge_t *virge = (virge_t *) priv; - - if (virge->render_thread_run) { - thread_wait_event(virge->wake_render_thread, -1); - thread_reset_event(virge->wake_render_thread); - virge->s3d_busy = 1; - while (virge->s3d_read_idx < virge->s3d_write_idx) { - s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx]); - virge->s3d_read_idx++; - if (virge->s3d_read_idx >= RB_SIZE) - virge->s3d_read_idx = 0; - - if (RB_ENTRIES == RB_MASK) - thread_set_event(virge->not_full_event); - } - virge->s3d_busy = 0; - virge->subsys_stat |= INT_S3D_DONE; - s3_virge_update_irqs(virge); - } -} - static void s3_virge_out(uint16_t addr, uint8_t val, void *priv) { @@ -1152,6 +1090,388 @@ s3_virge_vblank_start(svga_t *svga) s3_virge_update_irqs(virge); } +static void s3_virge_wait_fifo_idle(virge_t *virge) { + while (!FIFO_EMPTY) { + wake_fifo_thread(virge); + thread_wait_event(virge->fifo_not_full_event, 1); + } +} + +static uint8_t +s3_virge_mmio_read(uint32_t addr, void *priv) +{ + virge_t *virge = (virge_t *) priv; + uint8_t ret = 0xff; + + s3_virge_log("[%04X:%08X]: MMIO ReadB addr = %04x\n", CS, cpu_state.pc, addr & 0xffff); + + switch (addr & 0xffff) { + case 0x8504: + if (!virge->virge_busy) + s3_virge_wake_fifo_thread(virge); + + ret = virge->subsys_stat; + return ret; + case 0x8505: + ret = 0xc0; + if (virge->s3d_busy || virge->virge_busy || !FIFO_EMPTY) + ret |= 0x10; + else + ret |= 0x30; + + if (!virge->virge_busy) + s3_virge_wake_fifo_thread(virge); + return ret; + + case 0x850c: + ret = virge->advfunc_cntl & 0x3f; + ret |= virge->fifo_slots_num << 6; + ret &= 0xff; + return ret; + case 0x850d: + ret = virge->fifo_slots_num >> 2; + return ret; + + case 0x83b0: + case 0x83b1: + case 0x83b2: + case 0x83b3: + case 0x83b4: + case 0x83b5: + case 0x83b6: + case 0x83b7: + case 0x83b8: + case 0x83b9: + case 0x83ba: + case 0x83bb: + case 0x83bc: + case 0x83bd: + case 0x83be: + case 0x83bf: + case 0x83c0: + case 0x83c1: + case 0x83c2: + case 0x83c3: + case 0x83c4: + case 0x83c5: + case 0x83c6: + case 0x83c7: + case 0x83c8: + case 0x83c9: + case 0x83ca: + case 0x83cb: + case 0x83cc: + case 0x83cd: + case 0x83ce: + case 0x83cf: + case 0x83d0: + case 0x83d1: + case 0x83d2: + case 0x83d3: + case 0x83d4: + case 0x83d5: + case 0x83d6: + case 0x83d7: + case 0x83d8: + case 0x83d9: + case 0x83da: + case 0x83db: + case 0x83dc: + case 0x83dd: + case 0x83de: + case 0x83df: + return s3_virge_in(addr & 0x3ff, virge); + + case 0x859c: + return virge->cmd_dma; + + case 0xff20: + case 0xff21: + ret = virge->serialport & ~(SERIAL_PORT_SCR | SERIAL_PORT_SDR); + if ((virge->serialport & SERIAL_PORT_SCW) && i2c_gpio_get_scl(virge->i2c)) + ret |= SERIAL_PORT_SCR; + if ((virge->serialport & SERIAL_PORT_SDW) && i2c_gpio_get_sda(virge->i2c)) + ret |= SERIAL_PORT_SDR; + return ret; + + default: + break; + } + return 0xff; +} + +static uint16_t +s3_virge_mmio_read_w(uint32_t addr, void *priv) +{ + virge_t *virge = (virge_t *) priv; + uint16_t ret = 0xffff; + + s3_virge_log("[%04X:%08X]: MMIO ReadW addr = %04x\n", CS, cpu_state.pc, addr & 0xfffe); + + switch (addr & 0xfffe) { + case 0x8504: + ret = 0xc000; + if (virge->s3d_busy || virge->virge_busy || !FIFO_EMPTY) + ret |= 0x1000; + else + ret |= 0x3000; + + ret |= virge->subsys_stat; + if (!virge->virge_busy) + s3_virge_wake_fifo_thread(virge); + return ret; + + case 0x850c: + ret = virge->advfunc_cntl & 0x3f; + ret |= virge->fifo_slots_num << 6; + return ret; + + case 0x859c: + return virge->cmd_dma; + + default: + return s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8); + } + + return 0xffff; +} + +static uint32_t +s3_virge_mmio_read_l(uint32_t addr, void *priv) +{ + virge_t *virge = (virge_t *) priv; + uint32_t ret = 0xffffffff; + + switch (addr & 0xfffc) { + case 0x8180: + ret = virge->streams.pri_ctrl; + break; + case 0x8184: + ret = virge->streams.chroma_ctrl; + break; + case 0x8190: + ret = virge->streams.sec_ctrl; + break; + case 0x8194: + ret = virge->streams.chroma_upper_bound; + break; + case 0x8198: + ret = virge->streams.sec_filter; + break; + case 0x81a0: + ret = virge->streams.blend_ctrl; + break; + case 0x81c0: + ret = virge->streams.pri_fb0; + break; + case 0x81c4: + ret = virge->streams.pri_fb1; + break; + case 0x81c8: + ret = virge->streams.pri_stride; + break; + case 0x81cc: + ret = virge->streams.buffer_ctrl; + break; + case 0x81d0: + ret = virge->streams.sec_fb0; + break; + case 0x81d4: + ret = virge->streams.sec_fb1; + break; + case 0x81d8: + ret = virge->streams.sec_stride; + break; + case 0x81dc: + ret = virge->streams.overlay_ctrl; + break; + case 0x81e0: + ret = virge->streams.k1_vert_scale; + break; + case 0x81e4: + ret = virge->streams.k2_vert_scale; + break; + case 0x81e8: + ret = virge->streams.dda_vert_accumulator; + break; + case 0x81ec: + ret = virge->streams.fifo_ctrl; + break; + case 0x81f0: + ret = virge->streams.pri_start; + break; + case 0x81f4: + ret = virge->streams.pri_size; + break; + case 0x81f8: + ret = virge->streams.sec_start; + break; + case 0x81fc: + ret = virge->streams.sec_size; + break; + + case 0x8504: + ret = 0x0000c000; + if (virge->s3d_busy || virge->virge_busy || !FIFO_EMPTY) + ret |= 0x00001000; + else + ret |= 0x00003000; + + ret |= virge->subsys_stat; + if (!virge->virge_busy) + s3_virge_wake_fifo_thread(virge); + + s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx2); + break; + + case 0x850c: + ret = virge->advfunc_cntl & 0x3f; + ret |= virge->fifo_slots_num << 6; + break; + + case 0x8590: + ret = virge->cmd_dma_base; + break; + case 0x8594: + break; + case 0x8598: + ret = virge->dma_ptr; + break; + case 0x859c: + ret = virge->cmd_dma; + break; + + case 0xa4d4: + case 0xa8d4: + case 0xacd4: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.src_base; + break; + case 0xa4d8: + case 0xa8d8: + case 0xacd8: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.dest_base; + break; + case 0xa4dc: + case 0xa8dc: + case 0xacdc: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r; + break; + case 0xa4e0: + case 0xa8e0: + case 0xace0: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b; + break; + case 0xa4e4: + case 0xa8e4: + case 0xace4: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str; + break; + case 0xa4e8: + case 0xace8: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.mono_pat_0; + break; + case 0xa4ec: + case 0xacec: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.mono_pat_1; + break; + case 0xa4f0: + case 0xacf0: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.pat_bg_clr; + break; + case 0xa4f4: + case 0xa8f4: + case 0xacf4: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.pat_fg_clr; + break; + case 0xa4f8: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.src_bg_clr; + break; + case 0xa4fc: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.src_fg_clr; + break; + case 0xa500: + case 0xa900: + case 0xad00: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.cmd_set; + break; + case 0xa504: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.r_width << 16) | virge->s3d.r_height; + break; + case 0xa508: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y; + break; + case 0xa50c: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y; + break; + case 0xa96c: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.lxend0 << 16) | virge->s3d.lxend1; + break; + case 0xa970: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.ldx; + break; + case 0xa974: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.lxstart; + break; + case 0xa978: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.lystart; + break; + case 0xa97c: + s3_virge_wait_fifo_idle(virge); + ret = (virge->s3d.line_dir << 31) | virge->s3d.lycnt; + break; + case 0xad68: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.prdx; + break; + case 0xad6c: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.prxstart; + break; + case 0xad70: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.pldx; + break; + case 0xad74: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.plxstart; + break; + case 0xad78: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.pystart; + break; + case 0xad7c: + s3_virge_wait_fifo_idle(virge); + ret = virge->s3d.pycnt; + break; + + default: + ret = s3_virge_mmio_read_w(addr, priv) | (s3_virge_mmio_read_w(addr + 2, priv) << 16); + break; + } + + s3_virge_log("MMIO ReadL addr = %04x, val = %08x.\n", addr & 0xfffc, ret); + return ret; +} + static void s3_virge_mmio_fifo_write(uint32_t addr, uint8_t val, virge_t *virge) { @@ -1646,380 +1966,6 @@ s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) } } -static uint8_t -s3_virge_mmio_read(uint32_t addr, void *priv) -{ - virge_t *virge = (virge_t *) priv; - uint8_t ret = 0xff; - - s3_virge_log("[%04X:%08X]: MMIO ReadB addr = %04x\n", CS, cpu_state.pc, addr & 0xffff); - - switch (addr & 0xffff) { - case 0x8504: - if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - - ret = virge->subsys_stat; - return ret; - case 0x8505: - ret = 0xc0; - if (virge->s3d_busy || virge->virge_busy || FIFO_NOT_EMPTY) - ret |= 0x10; - else - ret |= 0x30; - - if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - return ret; - - case 0x850c: - ret = virge->advfunc_cntl & 0x3f; - ret |= virge->fifo_slots_num << 6; - ret &= 0xff; - return ret; - case 0x850d: - ret = virge->fifo_slots_num >> 2; - return ret; - - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - return s3_virge_in(addr & 0x3ff, virge); - - case 0x859c: - return virge->cmd_dma; - - case 0xff20: - case 0xff21: - ret = virge->serialport & ~(SERIAL_PORT_SCR | SERIAL_PORT_SDR); - if ((virge->serialport & SERIAL_PORT_SCW) && i2c_gpio_get_scl(virge->i2c)) - ret |= SERIAL_PORT_SCR; - if ((virge->serialport & SERIAL_PORT_SDW) && i2c_gpio_get_sda(virge->i2c)) - ret |= SERIAL_PORT_SDR; - return ret; - - default: - break; - } - return 0xff; -} -static uint16_t -s3_virge_mmio_read_w(uint32_t addr, void *priv) -{ - virge_t *virge = (virge_t *) priv; - uint16_t ret = 0xffff; - - s3_virge_log("[%04X:%08X]: MMIO ReadW addr = %04x\n", CS, cpu_state.pc, addr & 0xfffe); - - switch (addr & 0xfffe) { - case 0x8504: - ret = 0xc000; - if (virge->s3d_busy || virge->virge_busy || FIFO_NOT_EMPTY) - ret |= 0x1000; - else - ret |= 0x3000; - - ret |= virge->subsys_stat; - if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - return ret; - - case 0x850c: - ret = virge->advfunc_cntl & 0x3f; - ret |= virge->fifo_slots_num << 6; - return ret; - - case 0x859c: - return virge->cmd_dma; - - default: - return s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8); - } - - return 0xffff; -} - -static uint32_t -s3_virge_mmio_read_l(uint32_t addr, void *priv) -{ - virge_t *virge = (virge_t *) priv; - uint32_t ret = 0xffffffff; - - switch (addr & 0xfffc) { - case 0x8180: - ret = virge->streams.pri_ctrl; - break; - case 0x8184: - ret = virge->streams.chroma_ctrl; - break; - case 0x8190: - ret = virge->streams.sec_ctrl; - break; - case 0x8194: - ret = virge->streams.chroma_upper_bound; - break; - case 0x8198: - ret = virge->streams.sec_filter; - break; - case 0x81a0: - ret = virge->streams.blend_ctrl; - break; - case 0x81c0: - ret = virge->streams.pri_fb0; - break; - case 0x81c4: - ret = virge->streams.pri_fb1; - break; - case 0x81c8: - ret = virge->streams.pri_stride; - break; - case 0x81cc: - ret = virge->streams.buffer_ctrl; - break; - case 0x81d0: - ret = virge->streams.sec_fb0; - break; - case 0x81d4: - ret = virge->streams.sec_fb1; - break; - case 0x81d8: - ret = virge->streams.sec_stride; - break; - case 0x81dc: - ret = virge->streams.overlay_ctrl; - break; - case 0x81e0: - ret = virge->streams.k1_vert_scale; - break; - case 0x81e4: - ret = virge->streams.k2_vert_scale; - break; - case 0x81e8: - ret = virge->streams.dda_vert_accumulator; - break; - case 0x81ec: - ret = virge->streams.fifo_ctrl; - break; - case 0x81f0: - ret = virge->streams.pri_start; - break; - case 0x81f4: - ret = virge->streams.pri_size; - break; - case 0x81f8: - ret = virge->streams.sec_start; - break; - case 0x81fc: - ret = virge->streams.sec_size; - break; - - case 0x8504: - ret = 0x0000c000; - if (virge->s3d_busy || virge->virge_busy || FIFO_NOT_EMPTY) - ret |= 0x00001000; - else - ret |= 0x00003000; - - ret |= virge->subsys_stat; - if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - - s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx2); - break; - - case 0x850c: - ret = virge->advfunc_cntl & 0x3f; - ret |= virge->fifo_slots_num << 6; - break; - - case 0x8590: - ret = virge->cmd_dma_base; - break; - case 0x8594: - break; - case 0x8598: - ret = virge->dma_ptr; - break; - case 0x859c: - ret = virge->cmd_dma; - break; - - case 0xa4d4: - case 0xa8d4: - case 0xacd4: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.src_base; - break; - case 0xa4d8: - case 0xa8d8: - case 0xacd8: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.dest_base; - break; - case 0xa4dc: - case 0xa8dc: - case 0xacdc: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r; - break; - case 0xa4e0: - case 0xa8e0: - case 0xace0: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b; - break; - case 0xa4e4: - case 0xa8e4: - case 0xace4: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str; - break; - case 0xa4e8: - case 0xace8: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.mono_pat_0; - break; - case 0xa4ec: - case 0xacec: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.mono_pat_1; - break; - case 0xa4f0: - case 0xacf0: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.pat_bg_clr; - break; - case 0xa4f4: - case 0xa8f4: - case 0xacf4: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.pat_fg_clr; - break; - case 0xa4f8: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.src_bg_clr; - break; - case 0xa4fc: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.src_fg_clr; - break; - case 0xa500: - case 0xa900: - case 0xad00: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.cmd_set; - break; - case 0xa504: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.r_width << 16) | virge->s3d.r_height; - break; - case 0xa508: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y; - break; - case 0xa50c: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y; - break; - case 0xa96c: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.lxend0 << 16) | virge->s3d.lxend1; - break; - case 0xa970: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.ldx; - break; - case 0xa974: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.lxstart; - break; - case 0xa978: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.lystart; - break; - case 0xa97c: - s3_virge_wait_fifo_idle(virge); - ret = (virge->s3d.line_dir << 31) | virge->s3d.lycnt; - break; - case 0xad68: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.prdx; - break; - case 0xad6c: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.prxstart; - break; - case 0xad70: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.pldx; - break; - case 0xad74: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.plxstart; - break; - case 0xad78: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.pystart; - break; - case 0xad7c: - s3_virge_wait_fifo_idle(virge); - ret = virge->s3d.pycnt; - break; - - default: - ret = s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8) | (s3_virge_mmio_read(addr + 2, virge) << 16) | (s3_virge_mmio_read(addr + 3, virge) << 24); - break; - } - - s3_virge_log("MMIO ReadL addr = %04x, val = %08x.\n", addr & 0xfffc, ret); - return ret; -} - static void s3_virge_fifo_thread(void *priv) { @@ -2030,39 +1976,66 @@ s3_virge_fifo_thread(void *priv) thread_wait_event(virge->wake_fifo_thread, -1); thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; - while (FIFO_NOT_EMPTY) { + while (!FIFO_EMPTY) { s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x, val=%08x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val); - switch (virge->fifo[virge->fifo_read_idx].addr_type & FIFO_TYPE) { + + uint64_t start_time = plat_timer_read(); + uint64_t end_time; + fifo_entry_t *fifo = &virge->fifo[virge->fifo_read_idx & FIFO_MASK]; + uint32_t val = fifo->val; + + switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: - s3_virge_mmio_fifo_write(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); + s3_virge_mmio_fifo_write(fifo->addr_type & FIFO_ADDR, + val, virge); break; case FIFO_WRITE_WORD: - s3_virge_mmio_fifo_write_w(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); + s3_virge_mmio_fifo_write_w(fifo->addr_type & FIFO_ADDR, + val, virge); break; case FIFO_WRITE_DWORD: - s3_virge_mmio_fifo_write_l(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); + s3_virge_mmio_fifo_write_l(fifo->addr_type & FIFO_ADDR, + val, virge); break; default: break; } - virge->fifo[virge->fifo_read_idx].addr_type = FIFO_INVALID; virge->fifo_read_idx++; + fifo->addr_type = FIFO_INVALID; if (FIFO_ENTRIES > 0xe000) thread_set_event(virge->fifo_not_full_event); + + end_time = plat_timer_read(); + virge->blitter_time += end_time - start_time; } - if (virge->fifo_read_idx >= virge->fifo_write_idx) { - virge->fifo_read_idx = 0; - virge->fifo_write_idx = 0; - virge->virge_busy = 0; - s3_virge_log("Complete.\n"); - virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; - s3_virge_update_irqs(virge); - } + virge->virge_busy = 0; + virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; + s3_virge_update_irqs(virge); } } +static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) { + fifo_entry_t *fifo = &virge->fifo[virge->fifo_write_idx & FIFO_MASK]; + + if (FIFO_FULL) { + thread_reset_event(virge->fifo_not_full_event); + if (FIFO_FULL) + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } + + fifo->val = val; + fifo->addr_type = (addr & FIFO_ADDR) | type; + + virge->fifo_write_idx++; + + if (FIFO_ENTRIES > 0xe000) + wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) + wake_fifo_thread(virge); +} + static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) { @@ -3834,6 +3807,43 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) virge->blitter_time += end_time - start_time; } +static void +s3_virge_render_thread(void *priv) +{ + virge_t *virge = (virge_t *) priv; + + while (virge->render_thread_run) { + thread_wait_event(virge->wake_render_thread, -1); + thread_reset_event(virge->wake_render_thread); + virge->s3d_busy = 1; + while (!RB_EMPTY) { + s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]); + virge->s3d_read_idx++; + + if (RB_ENTRIES == RB_SIZE - 1) + thread_set_event(virge->not_full_event); + } + virge->s3d_busy = 0; + virge->subsys_stat |= INT_S3D_DONE; + s3_virge_update_irqs(virge); + } +} + +static void +queue_triangle(virge_t *virge) +{ + if (RB_FULL) { + thread_reset_event(virge->not_full_event); + if (RB_FULL) + thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/ + } + + virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri; + virge->s3d_write_idx++; + if (!virge->s3d_busy) + thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/ +} + static void s3_virge_hwcursor_draw(svga_t *svga, int displine) { @@ -4704,14 +4714,15 @@ s3_virge_init(const device_t *info) virge->svga.force_old_addr = 1; - virge->wake_render_thread = thread_create_event(); - virge->not_full_event = thread_create_event(); virge->render_thread_run = 1; + virge->wake_render_thread = thread_create_event(); + virge->wake_main_thread = thread_create_event(); + virge->not_full_event = thread_create_event(); virge->render_thread = thread_create(s3_virge_render_thread, virge); + virge->fifo_thread_run = 1; virge->wake_fifo_thread = thread_create_event(); virge->fifo_not_full_event = thread_create_event(); - virge->fifo_thread_run = 1; virge->fifo_thread = thread_create(s3_virge_fifo_thread, virge); virge->local = info->local; @@ -4730,6 +4741,7 @@ s3_virge_close(void *priv) thread_set_event(virge->wake_render_thread); thread_wait(virge->render_thread); thread_destroy_event(virge->not_full_event); + thread_destroy_event(virge->wake_main_thread); thread_destroy_event(virge->wake_render_thread); virge->fifo_thread_run = 0; From a0d662e9c5f0e342a5542c674c65328724a2129a Mon Sep 17 00:00:00 2001 From: TC1995 Date: Thu, 15 Aug 2024 23:04:48 +0200 Subject: [PATCH 13/26] S3 changes (and TVP3026) of the day (August 15th, 2024) 1. TVP3026: disable lut mapping on 24bpp modes (mainly the S3 Vision968 drivers which use the TVP3026 ramdac). 2. S3 Vision968 (MiroVideo 40SV Ergo): Corrected 1280x1024 24bpp resolution. 3. ViRGE class: limit the FIFO entries in the bitblt regs to less than 16 entries and make the Image Transfers FIFO writes wraparound to 1 and reads to 0 (to allow at least a read to a write in the thread without hanging the entire emulator). Fixes Win3.1 PBRUSH colors while keeping performance on par with PCem. --- src/video/vid_s3.c | 11 +++++ src/video/vid_s3_virge.c | 76 ++++++++++++++++++++++------------ src/video/vid_tvp3026_ramdac.c | 2 +- 3 files changed, 62 insertions(+), 27 deletions(-) diff --git a/src/video/vid_s3.c b/src/video/vid_s3.c index 7dc070fd3..d2b5a87fe 100644 --- a/src/video/vid_s3.c +++ b/src/video/vid_s3.c @@ -3965,6 +3965,17 @@ s3_recalctimings(svga_t *svga) svga->dots_per_clock = (svga->dots_per_clock << 1) / 3; break; + case S3_VISION968: + switch (s3->card_type) { + case S3_MIROVIDEO40SV_ERGO_968: + svga->hdisp = (svga->hdisp / 3) << 2; + svga->dots_per_clock = (svga->hdisp / 3) << 2; + break; + default: + break; + } + break; + case S3_TRIO64: case S3_TRIO32: svga->hdisp /= 3; diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index c5ed474bf..ea3a899f4 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -311,10 +311,6 @@ static video_timings_t timing_diamond_stealth3d_3000_pci = { .type = VIDEO_PCI, static video_timings_t timing_virge_dx_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; static video_timings_t timing_virge_agp = { .type = VIDEO_AGP, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; -static inline void wake_fifo_thread(virge_t *virge) { - thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ -} - static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri); static void s3_virge_recalctimings(svga_t *svga); @@ -761,21 +757,21 @@ s3_virge_recalctimings(svga_t *svga) } if (svga->crtc[0x5d] & 0x01) - svga->htotal += 0x100; + svga->htotal |= 0x100; if (svga->crtc[0x5d] & 0x02) { - svga->hdisp_time += 0x100; - svga->hdisp += 0x100 * svga->dots_per_clock; + svga->hdisp_time |= 0x100; + svga->hdisp |= (0x100 * svga->dots_per_clock); } if (svga->crtc[0x5e] & 0x01) - svga->vtotal += 0x400; + svga->vtotal |= 0x400; if (svga->crtc[0x5e] & 0x02) - svga->dispend += 0x400; + svga->dispend |= 0x400; if (svga->crtc[0x5e] & 0x04) - svga->vblankstart += 0x400; + svga->vblankstart |= 0x400; if (svga->crtc[0x5e] & 0x10) - svga->vsyncstart += 0x400; + svga->vsyncstart |= 0x400; if (svga->crtc[0x5e] & 0x40) - svga->split += 0x400; + svga->split |= 0x400; svga->interlace = svga->crtc[0x42] & 0x20; if (((svga->miscout >> 2) & 3) == 3) { @@ -821,9 +817,9 @@ s3_virge_recalctimings(svga_t *svga) { svga->ma_latch |= (virge->ma_ext << 16); if (svga->crtc[0x51] & 0x30) - svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4; + svga->rowoffset |= (svga->crtc[0x51] & 0x30) << 4; else if (svga->crtc[0x43] & 0x04) - svga->rowoffset += 0x100; + svga->rowoffset |= 0x100; if (!svga->rowoffset) svga->rowoffset = 256; @@ -1092,7 +1088,7 @@ s3_virge_vblank_start(svga_t *svga) static void s3_virge_wait_fifo_idle(virge_t *virge) { while (!FIFO_EMPTY) { - wake_fifo_thread(virge); + s3_virge_wake_fifo_thread(virge); thread_wait_event(virge->fifo_not_full_event, 1); } } @@ -1121,6 +1117,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) if (!virge->virge_busy) s3_virge_wake_fifo_thread(virge); + + s3_virge_log("Subsys status BYTE1, busy=%d, fiford=%d, fifowr=%d, ret=%x.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx, ret); return ret; case 0x850c: @@ -1321,7 +1319,7 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) if (!virge->virge_busy) s3_virge_wake_fifo_thread(virge); - s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx2); + s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx); break; case 0x850c: @@ -1972,17 +1970,19 @@ s3_virge_fifo_thread(void *priv) virge_t *virge = (virge_t *)priv; while (virge->fifo_thread_run) { + s3_virge_log("Thread, entries=%d.\n", FIFO_ENTRIES); thread_set_event(virge->fifo_not_full_event); - thread_wait_event(virge->wake_fifo_thread, -1); - thread_reset_event(virge->wake_fifo_thread); + //thread_wait_event(virge->wake_fifo_thread, -1); + //thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; + s3_virge_log("Busy.\n"); while (!FIFO_EMPTY) { s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x, val=%08x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val); - uint64_t start_time = plat_timer_read(); uint64_t end_time; fifo_entry_t *fifo = &virge->fifo[virge->fifo_read_idx & FIFO_MASK]; uint32_t val = fifo->val; + int op_type = (((fifo->addr_type & FIFO_ADDR) & 0x1c00) >> 10) - 1; switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: @@ -2004,8 +2004,14 @@ s3_virge_fifo_thread(void *priv) virge->fifo_read_idx++; fifo->addr_type = FIFO_INVALID; - if (FIFO_ENTRIES > 0xe000) + if (op_type) { + if (FIFO_ENTRIES > 0xe000) + thread_set_event(virge->fifo_not_full_event); + } else { thread_set_event(virge->fifo_not_full_event); + if (((fifo->addr_type & FIFO_ADDR) & 0xffff) >= 0x8000) + virge->fifo_read_idx &= FIFO_MASK; + } end_time = plat_timer_read(); virge->blitter_time += end_time - start_time; @@ -2016,7 +2022,10 @@ s3_virge_fifo_thread(void *priv) } } -static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) { +static void +s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) +{ + int op_type = ((addr & 0x1c00) >> 10) - 1; fifo_entry_t *fifo = &virge->fifo[virge->fifo_write_idx & FIFO_MASK]; if (FIFO_FULL) { @@ -2030,10 +2039,25 @@ static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t virge->fifo_write_idx++; - if (FIFO_ENTRIES > 0xe000) - wake_fifo_thread(virge); - if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) - wake_fifo_thread(virge); + if (op_type) { + if (FIFO_ENTRIES > 0xe000) + s3_virge_wake_fifo_thread(virge); + if ((FIFO_ENTRIES > 0xe000) || (FIFO_ENTRIES < 8)) + s3_virge_wake_fifo_thread(virge); + } else { + if (FIFO_ENTRIES < 16) { + s3_virge_wake_fifo_thread(virge); + s3_virge_log("FIFO ENTRIES=%d.\n", FIFO_ENTRIES); + } else { + s3_virge_log("FIFO FULL.\n"); + thread_reset_event(virge->fifo_not_full_event); + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } + if (((addr & 0xffff) >= 0x8000)) { + if (virge->fifo_write_idx == FIFO_SIZE) + virge->fifo_write_idx = 1; + } + } } static void @@ -3820,7 +3844,7 @@ s3_virge_render_thread(void *priv) s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]); virge->s3d_read_idx++; - if (RB_ENTRIES == RB_SIZE - 1) + if (RB_ENTRIES == RB_MASK) thread_set_event(virge->not_full_event); } virge->s3d_busy = 0; diff --git a/src/video/vid_tvp3026_ramdac.c b/src/video/vid_tvp3026_ramdac.c index b50d0406b..6c001b461 100644 --- a/src/video/vid_tvp3026_ramdac.c +++ b/src/video/vid_tvp3026_ramdac.c @@ -516,7 +516,7 @@ tvp3026_recalctimings(void *priv, svga_t *svga) svga->interlace = !!(ramdac->ccr & 0x40); /* TODO: Figure out gamma correction for 15/16 bpp color. */ - svga->lut_map = !!(svga->bpp >= 15 && (ramdac->true_color & 0xf0) != 0x00); + svga->lut_map = !!((svga->bpp >= 15 && (svga->bpp != 24)) && (ramdac->true_color & 0xf0) != 0x00); if (!(ramdac->clock_sel & 0x70)) { if (ramdac->mcr != 0x98) { From 537e6f76de2e762a9e92f5727fed96dcefd8e9af Mon Sep 17 00:00:00 2001 From: OBattler Date: Fri, 16 Aug 2024 05:43:55 +0200 Subject: [PATCH 14/26] S3 ViRGE: Replace code with re-cleaned-up PCem code with our changes manually re-applied, also revert hardware cursor offset checks, which fixes black bars in games. --- src/video/vid_s3_virge.c | 3782 ++++++++++++++++++-------------------- 1 file changed, 1756 insertions(+), 2026 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index ea3a899f4..aecd309e3 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -40,31 +40,25 @@ #include <86box/vid_svga.h> #include <86box/vid_svga_render.h> +#ifdef MIN + #undef MIN +#endif +#ifdef MAX + #undef MAX +#endif +#ifdef CLAMP + #undef CLAMP +#endif + +static uint64_t virge_time = 0; + static int dither[4][4] = { - {0, 4, 1, 5}, - { 6, 2, 7, 3}, - { 1, 5, 0, 4}, - { 7, 3, 6, 2}, + {0, 4, 1, 5}, + {6, 2, 7, 3}, + {1, 5, 0, 4}, + {7, 3, 6, 2} }; -#define RB_SIZE 256 -#define RB_MASK (RB_SIZE - 1) - -#define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx) -#define RB_FULL (RB_ENTRIES == RB_SIZE) -#define RB_EMPTY (!RB_ENTRIES) - -#define FIFO_SIZE 65536 -#define FIFO_MASK (FIFO_SIZE - 1) -#define FIFO_ENTRY_SIZE (1 << 31) - -#define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) -#define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) -#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) - -#define FIFO_TYPE 0xff000000 -#define FIFO_ADDR 0x00ffffff - #define ROM_VIRGE_325 "roms/video/s3virge/86c325.bin" #define ROM_DIAMOND_STEALTH3D_2000 "roms/video/s3virge/s3virge.bin" #define ROM_DIAMOND_STEALTH3D_3000 "roms/video/s3virge/diamondstealth3000.vbi" @@ -76,6 +70,24 @@ static int dither[4][4] = { #define ROM_DIAMOND_STEALTH3D_4000 "roms/video/s3virge/86c357.bin" #define ROM_TRIO3D2X "roms/video/s3virge/TRIO3D2X_8mbsdr.VBI" +#define RB_SIZE 256 +#define RB_MASK (RB_SIZE - 1) + +#define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx) +#define RB_FULL (RB_ENTRIES == RB_SIZE) +#define RB_EMPTY (!RB_ENTRIES) + +#define FIFO_SIZE 65536 +#define FIFO_MASK (FIFO_SIZE - 1) +#define FIFO_ENTRY_SIZE (1 << 31) + +#define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) +#define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) +#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) + +#define FIFO_TYPE 0xff000000 +#define FIFO_ADDR 0x00ffffff + enum { S3_VIRGE_325, S3_DIAMOND_STEALTH3D_2000, @@ -98,58 +110,80 @@ enum { }; enum { - FIFO_INVALID = (0x00 << 24), - FIFO_WRITE_BYTE = (0x01 << 24), - FIFO_WRITE_WORD = (0x02 << 24), + FIFO_INVALID = (0x00 << 24), + FIFO_WRITE_BYTE = (0x01 << 24), + FIFO_WRITE_WORD = (0x02 << 24), FIFO_WRITE_DWORD = (0x03 << 24) }; -typedef struct -{ - uint32_t addr_type; - uint32_t val; +typedef struct { + uint32_t addr_type; + uint32_t val; } fifo_entry_t; typedef struct s3d_t { - uint32_t cmd_set; - int clip_l, clip_r, clip_t, clip_b; + uint32_t cmd_set; + int clip_l; + int clip_r; + int clip_t; + int clip_b; - uint32_t dest_base; - uint32_t dest_str; + uint32_t dest_base; + uint32_t dest_str; - uint32_t z_base; - uint32_t z_str; + uint32_t z_base; + uint32_t z_str; - uint32_t tex_base; - uint32_t tex_bdr_clr; - uint32_t tbv, tbu; - int32_t TdVdX, TdUdX; - int32_t TdVdY, TdUdY; - uint32_t tus, tvs; + uint32_t tex_base; + uint32_t tex_bdr_clr; + uint32_t tbv; + uint32_t tbu; + int32_t TdVdX; + int32_t TdUdX; + int32_t TdVdY; + int32_t TdUdY; + uint32_t tus; + uint32_t tvs; - int32_t TdZdX, TdZdY; - uint32_t tzs; + int32_t TdZdX; + int32_t TdZdY; + uint32_t tzs; - int32_t TdWdX, TdWdY; - uint32_t tws; + int32_t TdWdX; + int32_t TdWdY; + uint32_t tws; - int32_t TdDdX, TdDdY; - uint32_t tds; + int32_t TdDdX; + int32_t TdDdY; + uint32_t tds; - int16_t TdGdX, TdBdX, TdRdX, TdAdX; - int16_t TdGdY, TdBdY, TdRdY, TdAdY; - uint32_t tgs, tbs, trs, tas; + int16_t TdGdX; + int16_t TdBdX; + int16_t TdRdX; + int16_t TdAdX; + int16_t TdGdY; + int16_t TdBdY; + int16_t TdRdY; + int16_t TdAdY; + uint32_t tgs; + uint32_t tbs; + uint32_t trs; + uint32_t tas; - uint32_t TdXdY12; - uint32_t txend12; - uint32_t TdXdY01; - uint32_t txend01; - uint32_t TdXdY02; - uint32_t txs; - uint32_t tys; - int ty01, ty12, tlr; + uint32_t TdXdY12; + uint32_t txend12; + uint32_t TdXdY01; + uint32_t txend01; + uint32_t TdXdY02; + uint32_t txs; + uint32_t tys; + int ty01; + int ty12; + int tlr; - uint8_t fog_r, fog_g, fog_b; + uint8_t fog_r; + uint8_t fog_g; + uint8_t fog_b; } s3d_t; typedef struct virge_t { @@ -157,43 +191,51 @@ typedef struct virge_t { mem_mapping_t mmio_mapping; mem_mapping_t new_mmio_mapping; - rom_t bios_rom; + rom_t bios_rom; - svga_t svga; + svga_t svga; - uint8_t bank; - uint8_t ma_ext; - uint8_t reg6b, lfb_bios; + uint8_t bank; + uint8_t ma_ext; - uint8_t virge_id, virge_id_high, virge_id_low, virge_rev; + uint8_t virge_id; + uint8_t virge_id_high; + uint8_t virge_id_low; + uint8_t virge_rev; - uint8_t int_line; + uint32_t linear_base; + uint32_t linear_size; - uint32_t linear_base, linear_size; + uint8_t pci_regs[256]; + uint8_t pci_slot; - uint8_t pci_regs[256]; + int chip; - uint8_t pci_slot; - uint8_t irq_state; + int bilinear_enabled; + int dithering_enabled; + int memory_size; - int pci; - int chip; - int is_agp; + int pixel_count; + int tri_count; - int bilinear_enabled; - int dithering_enabled; - uint32_t memory_size; - uint32_t vram_mask; + thread_t * render_thread; + event_t * wake_render_thread; + event_t * wake_main_thread; + event_t * not_full_event; - uint32_t hwc_fg_col, hwc_bg_col; - int hwc_col_stack_pos; + uint32_t hwc_fg_col; + uint32_t hwc_bg_col; + int hwc_col_stack_pos; - struct - { + struct { uint32_t src_base; uint32_t dest_base; - int clip_l, clip_r, clip_t, clip_b; - int dest_str, src_str; + int clip_l; + int clip_r; + int clip_t; + int clip_b; + int dest_str; + int src_str; uint32_t mono_pat_0; uint32_t mono_pat_1; uint32_t pat_bg_clr; @@ -201,20 +243,28 @@ typedef struct virge_t { uint32_t src_bg_clr; uint32_t src_fg_clr; uint32_t cmd_set; - int r_width, r_height; - int rsrc_x, rsrc_y; - int rdest_x, rdest_y; + int r_width; + int r_height; + int rsrc_x; + int rsrc_y; + int rdest_x; + int rdest_y; - int lxend0, lxend1; + int lxend0; + int lxend1; int32_t ldx; - uint32_t lxstart, lystart; + uint32_t lxstart; + uint32_t lystart; int lycnt; int line_dir; - int src_x, src_y; - int dest_x, dest_y; - int w, h; - uint8_t rop; + int src_x; + int src_y; + int dest_x; + int dest_y; + int w; + int h; + uint8_t rop; int data_left_count; uint32_t data_left; @@ -233,14 +283,14 @@ typedef struct virge_t { uint32_t dest_l, dest_r; } s3d; - s3d_t s3d_tri; + s3d_t s3d_tri; - s3d_t s3d_buffer[RB_SIZE]; - atomic_int s3d_read_idx, s3d_write_idx; - atomic_int s3d_busy; + s3d_t s3d_buffer[RB_SIZE]; + int s3d_read_idx; + int s3d_write_idx; + int s3d_busy; - struct - { + struct { uint32_t pri_ctrl; uint32_t chroma_ctrl; uint32_t sec_ctrl; @@ -265,45 +315,64 @@ typedef struct virge_t { uint32_t sec_start; uint32_t sec_size; - int sdif; + int sdif; - int pri_x, pri_y, pri_w, pri_h; - int sec_x, sec_y, sec_w, sec_h; + int pri_x; + int pri_y; + int pri_w; + int pri_h; + int sec_x; + int sec_y; + int sec_w; + int sec_h; } streams; - uint8_t cmd_dma; - uint32_t cmd_dma_base; - uint32_t dma_ptr; - uint64_t blitter_time; + fifo_entry_t fifo[FIFO_SIZE]; + volatile int fifo_read_idx, fifo_write_idx; + + volatile int fifo_thread_run, render_thread_run; + + thread_t * fifo_thread; + event_t *wake_fifo_thread; + event_t * fifo_not_full_event; + + int virge_busy; + + uint8_t subsys_stat; + uint8_t subsys_cntl; + + int local; + + uint8_t serialport; + + uint8_t irq_state; + uint8_t advfunc_cntl; + + void *i2c, *ddc; + + int onboard; int fifo_slots_num; - fifo_entry_t fifo[FIFO_SIZE]; + uint32_t vram_mask; - atomic_int fifo_write_idx, fifo_read_idx; - atomic_int virge_busy; + uint8_t reg6b; + uint8_t lfb_bios; + uint8_t int_line; + uint8_t cmd_dma; - atomic_int fifo_thread_run, render_thread_run; + uint32_t cmd_dma_base; + uint32_t dma_ptr; - thread_t *render_thread; - event_t *wake_render_thread; - event_t *wake_main_thread; - event_t *not_full_event; - - thread_t *fifo_thread; - event_t *wake_fifo_thread; - event_t *fifo_not_full_event; - - int local; - - uint8_t subsys_stat, subsys_cntl, advfunc_cntl; - - uint8_t serialport; - - void *i2c, *ddc; - - int onboard; + int pci; + int is_agp; } virge_t; +static __inline void +wake_fifo_thread(virge_t *virge) { + /* Wake up FIFO thread if moving from idle */ + thread_set_event(virge->wake_fifo_thread); +} + static virge_t *reset_state = NULL; static video_timings_t timing_diamond_stealth3d_2000_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; @@ -311,48 +380,45 @@ static video_timings_t timing_diamond_stealth3d_3000_pci = { .type = VIDEO_PCI, static video_timings_t timing_virge_dx_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; static video_timings_t timing_virge_agp = { .type = VIDEO_AGP, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; -static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri); +static void queue_triangle(virge_t *virge); -static void s3_virge_recalctimings(svga_t *svga); -static void s3_virge_updatemapping(virge_t *virge); +static void s3_virge_recalctimings(svga_t *svga); +static void s3_virge_updatemapping(virge_t *virge); -static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat); +static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat); static uint8_t s3_virge_mmio_read(uint32_t addr, void *priv); -static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *rivp); +static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *priv); static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *priv); static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv); static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv); static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv); -static void queue_triangle(virge_t *virge); -static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type); - enum { - CMD_SET_AE = 1, - CMD_SET_HC = (1 << 1), + CMD_SET_AE = 1, + CMD_SET_HC = (1 << 1), - CMD_SET_FORMAT_MASK = (7 << 2), - CMD_SET_FORMAT_8 = (0 << 2), - CMD_SET_FORMAT_16 = (1 << 2), - CMD_SET_FORMAT_24 = (2 << 2), + CMD_SET_FORMAT_MASK = (7 << 2), + CMD_SET_FORMAT_8 = (0 << 2), + CMD_SET_FORMAT_16 = (1 << 2), + CMD_SET_FORMAT_24 = (2 << 2), - CMD_SET_MS = (1 << 6), - CMD_SET_IDS = (1 << 7), - CMD_SET_MP = (1 << 8), - CMD_SET_TP = (1 << 9), + CMD_SET_MS = (1 << 6), + CMD_SET_IDS = (1 << 7), + CMD_SET_MP = (1 << 8), + CMD_SET_TP = (1 << 9), - CMD_SET_ITA_MASK = (3 << 10), - CMD_SET_ITA_BYTE = (0 << 10), - CMD_SET_ITA_WORD = (1 << 10), - CMD_SET_ITA_DWORD = (2 << 10), + CMD_SET_ITA_MASK = (3 << 10), + CMD_SET_ITA_BYTE = (0 << 10), + CMD_SET_ITA_WORD = (1 << 10), + CMD_SET_ITA_DWORD = (2 << 10), - CMD_SET_ZUP = (1 << 23), + CMD_SET_ZUP = (1 << 23), - CMD_SET_ZB_MODE = (3 << 24), + CMD_SET_ZB_MODE = (3 << 24), - CMD_SET_XP = (1 << 25), - CMD_SET_YP = (1 << 26), + CMD_SET_XP = (1 << 25), + CMD_SET_YP = (1 << 26), CMD_SET_COMMAND_MASK = (15 << 27) }; @@ -363,11 +429,11 @@ enum { #define CMD_SET_TWE (1 << 26) enum { - CMD_SET_COMMAND_BITBLT = (0 << 27), - CMD_SET_COMMAND_RECTFILL = (2 << 27), - CMD_SET_COMMAND_LINE = (3 << 27), - CMD_SET_COMMAND_POLY = (5 << 27), - CMD_SET_COMMAND_NOP = (15 << 27) + CMD_SET_COMMAND_BITBLT = (0 << 27), + CMD_SET_COMMAND_RECTFILL = (2 << 27), + CMD_SET_COMMAND_LINE = (3 << 27), + CMD_SET_COMMAND_POLY = (5 << 27), + CMD_SET_COMMAND_NOP = (15 << 27) }; #define INT_VSY (1 << 0) @@ -382,44 +448,18 @@ enum { #define SERIAL_PORT_SCR (1 << 2) #define SERIAL_PORT_SDR (1 << 3) -#ifdef ENABLE_S3_VIRGE_LOG -int s3_virge_do_log = ENABLE_S3_VIRGE_LOG; - static void -s3_virge_log(const char *fmt, ...) -{ - va_list ap; - - if (s3_virge_do_log) { - va_start(ap, fmt); - pclog_ex(fmt, ap); - va_end(ap); - } -} -#else -# define s3_virge_log(fmt, ...) -#endif - -static __inline void -s3_virge_wake_fifo_thread(virge_t *virge) -{ - thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ -} - -static void -s3_virge_update_irqs(virge_t *virge) -{ - if ((virge->svga.crtc[0x32] & 0x10) && (virge->subsys_stat & (virge->subsys_cntl & INT_MASK))) +s3_virge_update_irqs(virge_t *virge) { + if ((virge->svga.crtc[0x32] & 0x10) && (virge->subsys_stat & virge->subsys_cntl & INT_MASK)) pci_set_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); else pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } static void -s3_virge_out(uint16_t addr, uint8_t val, void *priv) -{ +s3_virge_out(uint16_t addr, uint8_t val, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; uint8_t old; uint32_t cursoraddr; @@ -433,8 +473,7 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga_recalctimings(svga); return; } - if (svga->seqaddr == 4) /*Chain-4 - update banking*/ - { + if (svga->seqaddr == 4) { /*Chain-4 - update banking*/ if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16; else @@ -458,7 +497,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) return; if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) val = (svga->crtc[7] & ~0x10) | (val & 0x10); - if ((svga->crtcreg >= 0x20) && (svga->crtcreg < 0x40) && (svga->crtcreg != 0x36) && (svga->crtcreg != 0x38) && (svga->crtcreg != 0x39) && ((svga->crtc[0x38] & 0xcc) != 0x48)) + if ((svga->crtcreg >= 0x20) && (svga->crtcreg < 0x40) && + (svga->crtcreg != 0x36) && (svga->crtcreg != 0x38) && + (svga->crtcreg != 0x39) && ((svga->crtc[0x38] & 0xcc) != 0x48)) return; if ((svga->crtcreg >= 0x40) && ((svga->crtc[0x39] & 0xe0) != 0xa0)) return; @@ -466,11 +507,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) return; if (svga->crtcreg >= 0x80) return; - old = svga->crtc[svga->crtcreg]; - svga->crtc[svga->crtcreg] = val; - if (svga->crtcreg > 0x18) - s3_virge_log("OUTB VGA reg = %02x, val = %02x\n", svga->crtcreg, val); + old = svga->crtc[svga->crtcreg]; + svga->crtc[svga->crtcreg] = val; switch (svga->crtcreg) { case 0x31: @@ -528,7 +567,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga->hwcursor.xoff = svga->crtc[0x4e] & 0x3f; svga->hwcursor.yoff = svga->crtc[0x4f] & 0x3f; cursoraddr = (virge->memory_size == 8) ? 0x1fff : 0x0fff; - svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & cursoraddr) * 1024) + (svga->hwcursor.yoff * 16); + svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | + svga->crtc[0x4d]) & cursoraddr) * 1024) + (svga->hwcursor.yoff * 16); break; case 0x4a: @@ -552,7 +592,7 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) switch (virge->hwc_col_stack_pos) { case 0: virge->hwc_bg_col = (virge->hwc_bg_col & 0xffff00) | val; - break; + break; case 1: virge->hwc_bg_col = (virge->hwc_bg_col & 0xff00ff) | (val << 8); break; @@ -600,8 +640,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga->bpp = (virge->chip == S3_VIRGEVX) ? 24 : 32; break; default: - svga->bpp = 8; - break; + svga->bpp = 8; + break; } break; @@ -616,7 +656,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) if (svga->crtcreg < 0xe || svga->crtcreg > 0x10) { if ((svga->crtcreg == 0xc) || (svga->crtcreg == 0xd)) { svga->fullchange = 3; - svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + ((svga->crtc[8] & 0x60) >> 5); + svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + + ((svga->crtc[8] & 0x60) >> 5); if ((svga->crtc[0x67] & 0xc) != 0xc) svga->ma_latch |= (virge->ma_ext << 16); } else { @@ -634,10 +675,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) } static uint8_t -s3_virge_in(uint16_t addr, void *priv) -{ +s3_virge_in(uint16_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; uint8_t ret; if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) @@ -660,10 +700,10 @@ s3_virge_in(uint16_t addr, void *priv) ret = 0xff; break; - case 0x3D4: + case 0x3d4: ret = svga->crtcreg; break; - case 0x3D5: + case 0x3d5: switch (svga->crtcreg) { case 0x2d: ret = virge->virge_id_high; @@ -680,15 +720,15 @@ s3_virge_in(uint16_t addr, void *priv) case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break; - case 0x33: - ret = (svga->crtc[0x33] | 0x04); - break; case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break; + case 0x36: + ret = (svga->crtc[0x36] & 0xfc) | 2; + break; /*PCI bus*/ case 0x45: virge->hwc_col_stack_pos = 0; - ret = svga->crtc[0x45]; + ret = svga->crtc[0x45]; break; case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); @@ -735,13 +775,13 @@ s3_virge_in(uint16_t addr, void *priv) ret = svga_in(addr, svga); break; } + return ret; } static void -s3_virge_recalctimings(svga_t *svga) -{ - const virge_t *virge = (virge_t *) svga->priv; +s3_virge_recalctimings(svga_t *svga) { + virge_t *virge = (virge_t *) svga->priv; svga->hdisp = svga->hdisp_old; @@ -778,7 +818,7 @@ s3_virge_recalctimings(svga_t *svga) int n = svga->seqregs[0x12] & 0x1f; int r = (svga->seqregs[0x12] >> 5); - if (virge->chip == S3_VIRGEVX || virge->chip == S3_VIRGEDX) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_VIRGEDX)) r &= 7; else if (virge->chip >= S3_VIRGEGX2) r &= 10; @@ -812,9 +852,7 @@ s3_virge_recalctimings(svga_t *svga) video_force_resize_set_monitor(1, svga->monitor_index); } - /* ViRGE/GX2 and later does not use primary stream registers. */ - if ((svga->crtc[0x67] & 0xc) != 0xc || virge->chip >= S3_VIRGEGX2) /*VGA mode*/ - { + if ((svga->crtc[0x67] & 0xc) != 0xc) { /*VGA mode*/ svga->ma_latch |= (virge->ma_ext << 16); if (svga->crtc[0x51] & 0x30) svga->rowoffset |= (svga->crtc[0x51] & 0x30) << 4; @@ -824,47 +862,42 @@ s3_virge_recalctimings(svga_t *svga) svga->rowoffset = 256; svga->lowres = !((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)); - if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)) { - switch (svga->bpp) { - case 8: - svga->render = svga_render_8bpp_highres; - break; - case 15: - svga->render = svga_render_15bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) { - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - svga->hdisp >>= 1; - svga->dots_per_clock >>= 1; - } - break; - case 16: - svga->render = svga_render_16bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) { - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - svga->hdisp >>= 1; - svga->dots_per_clock >>= 1; - } - break; - case 24: - svga->render = svga_render_24bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) - svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/ - break; - case 32: - svga->render = svga_render_32bpp_highres; - break; + if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)) switch (svga->bpp) { + case 8: + svga->render = svga_render_8bpp_highres; + break; + case 15: + svga->render = svga_render_15bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) { + svga->hdisp >>= 1; + svga->dots_per_clock >>= 1; + } + break; + case 16: + svga->render = svga_render_16bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) { + svga->hdisp >>= 1; + svga->dots_per_clock >>= 1; + } + break; + case 24: + svga->render = svga_render_24bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) + svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/ + break; + case 32: + svga->render = svga_render_32bpp_highres; + break; default: break; - } } - svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && (svga->crtc[0x32] & 0x40)) ? 0x3ffff : virge->vram_mask; + + svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && + (svga->crtc[0x32] & 0x40)) ? 0x3ffff : virge->vram_mask; + svga->overlay.ena = 0; - s3_virge_log("VGA mode\n"); + if (virge->chip >= S3_VIRGEGX2 && (svga->crtc[0x67] & 0xc) == 0xc) { /* ViRGE/GX2 and later does not use primary stream registers. */ svga->overlay.x = virge->streams.sec_x; @@ -881,8 +914,7 @@ s3_virge_recalctimings(svga_t *svga) svga->rowoffset = virge->streams.pri_stride >> 3; svga->vram_display_mask = virge->vram_mask; } - } else /*Streams mode*/ - { + } else { /*Streams mode*/ if (virge->streams.buffer_ctrl & 1) svga->ma_latch = virge->streams.pri_fb1 >> 2; else @@ -892,8 +924,8 @@ s3_virge_recalctimings(svga_t *svga) if (virge->streams.pri_h < svga->dispend) svga->dispend = virge->streams.pri_h; - svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x; - svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y; + svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x; + svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y; svga->overlay.cur_ysize = virge->streams.sec_h; if (virge->streams.buffer_ctrl & 2) @@ -901,12 +933,14 @@ s3_virge_recalctimings(svga_t *svga) else svga->overlay.addr = virge->streams.sec_fb0; - svga->overlay.ena = (svga->overlay.x >= 0); + svga->overlay.ena = (svga->overlay.x >= 0); svga->overlay.v_acc = virge->streams.dda_vert_accumulator; - svga->rowoffset = virge->streams.pri_stride >> 3; + + svga->rowoffset = virge->streams.pri_stride >> 3; if (virge->chip <= S3_VIRGEDX && svga->overlay.ena) { - svga->overlay.ena = (((virge->streams.blend_ctrl >> 24) & 7) == 0b000) || (((virge->streams.blend_ctrl >> 24) & 7) == 0b101); + svga->overlay.ena = (((virge->streams.blend_ctrl >> 24) & 7) == 0b000) || + (((virge->streams.blend_ctrl >> 24) & 7) == 0b101); } else if (virge->chip == S3_VIRGEGX2 && svga->overlay.ena) { /* 0x20 = Secondary Stream enabled */ /* 0x2000 = Primary Stream enabled */ @@ -918,17 +952,9 @@ s3_virge_recalctimings(svga_t *svga) svga->render = svga_render_8bpp_highres; break; case 3: /*KRGB-16 (1.5.5.5)*/ - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - // svga->dots_per_clock >>= 1; svga->render = svga_render_15bpp_highres; break; case 5: /*RGB-16 (5.6.5)*/ - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - // svga->dots_per_clock >>= 1; svga->render = svga_render_16bpp_highres; break; case 6: /*RGB-24 (8.8.8)*/ @@ -937,9 +963,6 @@ s3_virge_recalctimings(svga_t *svga) case 7: /*XRGB-32 (X.8.8.8)*/ svga->render = svga_render_32bpp_highres; break; - - default: - break; } svga->vram_display_mask = virge->vram_mask; } @@ -976,8 +999,7 @@ s3_virge_update_buffer(virge_t *virge) } static void -s3_virge_updatemapping(virge_t *virge) -{ +s3_virge_updatemapping(virge_t *virge) { svga_t *svga = &virge->svga; if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM)) { @@ -988,10 +1010,8 @@ s3_virge_updatemapping(virge_t *virge) return; } - s3_virge_log("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc); - /*Banked framebuffer*/ - switch (svga->gdcreg[6] & 0xc) { /*VGA mapping*/ - case 0x0: /*128k at A0000*/ + switch (svga->gdcreg[6] & 0xc) { /*Banked framebuffer*/ + case 0x0: /*128k at A0000*/ mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000); svga->banked_mask = 0xffff; break; @@ -1007,16 +1027,12 @@ s3_virge_updatemapping(virge_t *virge) mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000); svga->banked_mask = 0x7fff; break; - - default: - break; } virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24); - s3_virge_log("Linear framebuffer %02X, linear base = %08x, display mask = %08x\n", svga->crtc[0x58] & 0x17, virge->linear_base, svga->vram_display_mask); if ((svga->crtc[0x58] & 0x10) || (virge->advfunc_cntl & 0x10)) { /*Linear framebuffer*/ - switch (svga->crtc[0x58] & 7) { + switch (svga->crtc[0x58] & 3) { case 0: /*64k*/ virge->linear_size = 0x10000; break; @@ -1027,7 +1043,7 @@ s3_virge_updatemapping(virge_t *virge) virge->linear_size = 0x200000; break; case 3: /*4mb on other than ViRGE/VX, 8mb on ViRGE/VX*/ - if (virge->chip == S3_VIRGEVX || virge->chip == S3_TRIO3D2X) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_TRIO3D2X)) virge->linear_size = 0x800000; else virge->linear_size = 0x400000; @@ -1035,17 +1051,13 @@ s3_virge_updatemapping(virge_t *virge) case 7: virge->linear_size = 0x800000; break; - - default: - break; } virge->linear_base &= ~(virge->linear_size - 1); - s3_virge_log("Linear framebuffer at %08X size %08X, mask = %08x, CRTC58 sel = %02x\n", virge->linear_base, virge->linear_size, virge->vram_mask, svga->crtc[0x58] & 7); if (virge->linear_base == 0xa0000) { mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000); mem_mapping_disable(&virge->linear_mapping); } else { - if (virge->chip == S3_VIRGEVX || virge->chip == S3_TRIO3D2X) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_TRIO3D2X)) virge->linear_base &= 0xfe000000; else virge->linear_base &= 0xfc000000; @@ -1058,11 +1070,7 @@ s3_virge_updatemapping(virge_t *virge) svga->fb_only = 0; } - s3_virge_log("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x38); - - /* Memory mapped I/O. */ - /* Old MMIO. */ - if ((svga->crtc[0x53] & 0x10) || (virge->advfunc_cntl & 0x20)) { + if ((svga->crtc[0x53] & 0x10) || (virge->advfunc_cntl & 0x20)) { /*Old MMIO*/ if (svga->crtc[0x53] & 0x20) mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000); else @@ -1070,41 +1078,37 @@ s3_virge_updatemapping(virge_t *virge) } else mem_mapping_disable(&virge->mmio_mapping); - /* New MMIO. */ - if (svga->crtc[0x53] & 0x08) + if (svga->crtc[0x53] & 0x08) /*New MMIO*/ mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000); else mem_mapping_disable(&virge->new_mmio_mapping); } static void -s3_virge_vblank_start(svga_t *svga) -{ +s3_virge_vblank_start(svga_t *svga) { virge_t *virge = (virge_t *) svga->priv; virge->subsys_stat |= INT_VSY; s3_virge_update_irqs(virge); } -static void s3_virge_wait_fifo_idle(virge_t *virge) { +static void +s3_virge_wait_fifo_idle(virge_t *virge) { while (!FIFO_EMPTY) { - s3_virge_wake_fifo_thread(virge); + wake_fifo_thread(virge); thread_wait_event(virge->fifo_not_full_event, 1); } } static uint8_t -s3_virge_mmio_read(uint32_t addr, void *priv) -{ +s3_virge_mmio_read(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint8_t ret = 0xff; - - s3_virge_log("[%04X:%08X]: MMIO ReadB addr = %04x\n", CS, cpu_state.pc, addr & 0xffff); + uint8_t ret; switch (addr & 0xffff) { case 0x8504: if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); + wake_fifo_thread(virge); ret = virge->subsys_stat; return ret; @@ -1114,11 +1118,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) ret |= 0x10; else ret |= 0x30; - if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - - s3_virge_log("Subsys status BYTE1, busy=%d, fiford=%d, fifowr=%d, ret=%x.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx, ret); + wake_fifo_thread(virge); return ret; case 0x850c: @@ -1130,55 +1131,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) ret = virge->fifo_slots_num >> 2; return ret; - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - return s3_virge_in(addr & 0x3ff, virge); + case 0x83b0 ... 0x83df: + return s3_virge_in(addr & 0x3ff, priv); case 0x859c: return virge->cmd_dma; @@ -1191,20 +1145,14 @@ s3_virge_mmio_read(uint32_t addr, void *priv) if ((virge->serialport & SERIAL_PORT_SDW) && i2c_gpio_get_sda(virge->i2c)) ret |= SERIAL_PORT_SDR; return ret; - - default: - break; } return 0xff; } static uint16_t -s3_virge_mmio_read_w(uint32_t addr, void *priv) -{ +s3_virge_mmio_read_w(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint16_t ret = 0xffff; - - s3_virge_log("[%04X:%08X]: MMIO ReadW addr = %04x\n", CS, cpu_state.pc, addr & 0xfffe); + uint16_t ret; switch (addr & 0xfffe) { case 0x8504: @@ -1216,7 +1164,7 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) ret |= virge->subsys_stat; if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); + wake_fifo_thread(virge); return ret; case 0x850c: @@ -1228,17 +1176,16 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) return virge->cmd_dma; default: - return s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8); + return s3_virge_mmio_read(addr, priv) | (s3_virge_mmio_read(addr + 1, priv) << 8); } - return 0xffff; } static uint32_t -s3_virge_mmio_read_l(uint32_t addr, void *priv) -{ +s3_virge_mmio_read_l(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint32_t ret = 0xffffffff; + svga_t *svga = &virge->svga; + uint32_t ret = 0xffffffff; switch (addr & 0xfffc) { case 0x8180: @@ -1258,30 +1205,46 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) break; case 0x81a0: ret = virge->streams.blend_ctrl; + svga_recalctimings(svga); break; case 0x81c0: ret = virge->streams.pri_fb0; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81c4: ret = virge->streams.pri_fb1; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81c8: ret = virge->streams.pri_stride; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81cc: ret = virge->streams.buffer_ctrl; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d0: ret = virge->streams.sec_fb0; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d4: ret = virge->streams.sec_fb1; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d8: ret = virge->streams.sec_stride; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81dc: ret = virge->streams.overlay_ctrl; + svga->fullchange = changeframecount; break; case 0x81e0: ret = virge->streams.k1_vert_scale; @@ -1314,12 +1277,9 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) ret |= 0x00001000; else ret |= 0x00003000; - ret |= virge->subsys_stat; if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - - s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx); + wake_fifo_thread(virge); break; case 0x850c: @@ -1463,569 +1423,387 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) default: ret = s3_virge_mmio_read_w(addr, priv) | (s3_virge_mmio_read_w(addr + 2, priv) << 16); - break; } - s3_virge_log("MMIO ReadL addr = %04x, val = %08x.\n", addr & 0xfffc, ret); return ret; } static void -s3_virge_mmio_fifo_write(uint32_t addr, uint8_t val, virge_t *virge) -{ - if ((addr & 0xffff) < 0x8000) - s3_virge_bitblt(virge, 8, val); - else { - switch (addr & 0xffff) { - case 0x859c: - virge->cmd_dma = val; - break; - - default: - break; - } - } -} - -static void -s3_virge_mmio_fifo_write_w(uint32_t addr, uint16_t val, virge_t *virge) -{ - if ((addr & 0xfffe) < 0x8000) { - if (virge->s3d.cmd_set & CMD_SET_MS) - s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); - else - s3_virge_bitblt(virge, 16, val); - } else { - if ((addr & 0xfffe) == 0x859c) - virge->cmd_dma = val; - } -} - -static void -s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) -{ - if ((addr & 0xfffc) < 0x8000) { - if (virge->s3d.cmd_set & CMD_SET_MS) - s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); - else - s3_virge_bitblt(virge, 32, val); - } else { - switch (addr & 0xfffc) { - case 0x8590: - virge->cmd_dma_base = val; - break; - - case 0x8594: - virge->dma_ptr = val; - break; - - case 0x8598: - break; - - case 0x859c: - virge->cmd_dma = val; - break; - - case 0xa000: - case 0xa004: - case 0xa008: - case 0xa00c: - case 0xa010: - case 0xa014: - case 0xa018: - case 0xa01c: - case 0xa020: - case 0xa024: - case 0xa028: - case 0xa02c: - case 0xa030: - case 0xa034: - case 0xa038: - case 0xa03c: - case 0xa040: - case 0xa044: - case 0xa048: - case 0xa04c: - case 0xa050: - case 0xa054: - case 0xa058: - case 0xa05c: - case 0xa060: - case 0xa064: - case 0xa068: - case 0xa06c: - case 0xa070: - case 0xa074: - case 0xa078: - case 0xa07c: - case 0xa080: - case 0xa084: - case 0xa088: - case 0xa08c: - case 0xa090: - case 0xa094: - case 0xa098: - case 0xa09c: - case 0xa0a0: - case 0xa0a4: - case 0xa0a8: - case 0xa0ac: - case 0xa0b0: - case 0xa0b4: - case 0xa0b8: - case 0xa0bc: - case 0xa0c0: - case 0xa0c4: - case 0xa0c8: - case 0xa0cc: - case 0xa0d0: - case 0xa0d4: - case 0xa0d8: - case 0xa0dc: - case 0xa0e0: - case 0xa0e4: - case 0xa0e8: - case 0xa0ec: - case 0xa0f0: - case 0xa0f4: - case 0xa0f8: - case 0xa0fc: - case 0xa100: - case 0xa104: - case 0xa108: - case 0xa10c: - case 0xa110: - case 0xa114: - case 0xa118: - case 0xa11c: - case 0xa120: - case 0xa124: - case 0xa128: - case 0xa12c: - case 0xa130: - case 0xa134: - case 0xa138: - case 0xa13c: - case 0xa140: - case 0xa144: - case 0xa148: - case 0xa14c: - case 0xa150: - case 0xa154: - case 0xa158: - case 0xa15c: - case 0xa160: - case 0xa164: - case 0xa168: - case 0xa16c: - case 0xa170: - case 0xa174: - case 0xa178: - case 0xa17c: - case 0xa180: - case 0xa184: - case 0xa188: - case 0xa18c: - case 0xa190: - case 0xa194: - case 0xa198: - case 0xa19c: - case 0xa1a0: - case 0xa1a4: - case 0xa1a8: - case 0xa1ac: - case 0xa1b0: - case 0xa1b4: - case 0xa1b8: - case 0xa1bc: - case 0xa1c0: - case 0xa1c4: - case 0xa1c8: - case 0xa1cc: - case 0xa1d0: - case 0xa1d4: - case 0xa1d8: - case 0xa1dc: - case 0xa1e0: - case 0xa1e4: - case 0xa1e8: - case 0xa1ec: - case 0xa1f0: - case 0xa1f4: - case 0xa1f8: - case 0xa1fc: - { - int x = addr & 4; - int y = (addr >> 3) & 7; - int color; - int byte; - virge->s3d.pattern_8[y * 8 + x] = val & 0xff; - virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; - virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; - virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; - - x = (addr >> 1) & 6; - y = (addr >> 4) & 7; - virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; - virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; - - addr &= 0x00ff; - for (uint8_t xx = 0; xx < 4; xx++) { - x = ((addr + xx) / 3) % 8; - y = ((addr + xx) / 24) % 8; - color = ((addr + xx) % 3) << 3; - byte = (xx << 3); - virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); - virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; - } - - x = (addr >> 2) & 7; - y = (addr >> 5) & 7; - virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; - } - break; - - case 0xa4d4: /*2D BitBLT SRC Base*/ - case 0xa8d4: /*2D Line SRC Base*/ - case 0xacd4: /*2D Polygon SRC Base*/ - virge->s3d.src_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - s3_virge_log("PortWrite = %04x, SRC Base = %08x, memsize = %i\n", addr & 0xfffc, val, virge->memory_size); - break; - case 0xa4d8: /*2D BitBLT DEST Base*/ - case 0xa8d8: /*2D Line DEST Base*/ - case 0xacd8: /*2D Polygon DEST Base*/ - virge->s3d.dest_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - s3_virge_log("PortWrite = %04x, DST Base = %08x, memsize = %i\n", addr & 0xfffc, val, virge->memory_size); - break; - case 0xa4dc: /*2D BitBLT Left/Right Clipping*/ - case 0xa8dc: /*2D Line Left/Right Clipping*/ - case 0xacdc: /*2D Polygon Left/Right Clipping*/ - virge->s3d.clip_l = (val >> 16) & 0x7ff; - virge->s3d.clip_r = val & 0x7ff; - break; - case 0xa4e0: /*2D BitBLT Top/Bottom Clipping*/ - case 0xa8e0: /*2D Line Top/Bottom Clipping*/ - case 0xace0: /*2D Polygon Top/Bottom Clipping*/ - virge->s3d.clip_t = (val >> 16) & 0x7ff; - virge->s3d.clip_b = val & 0x7ff; - break; - case 0xa4e4: /*2D BitBLT DEST/SRC Stride*/ - case 0xa8e4: /*2D Line DEST/SRC Stride*/ - case 0xace4: /*2D Polygon DEST/SRC Stride*/ - virge->s3d.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xa4e8: /*2D BitBLT Mono Pattern 0*/ - case 0xace8: /*2D Polygon Mono Pattern 0*/ - virge->s3d.mono_pat_0 = val; - break; - case 0xa4ec: /*2D BitBLT Mono Pattern 1*/ - case 0xacec: /*2D Polygon Mono Pattern 1*/ - virge->s3d.mono_pat_1 = val; - break; - case 0xa4f0: /*2D BitBLT Mono Pattern Background*/ - case 0xacf0: /*2D Polygon Mono Pattern Background*/ - virge->s3d.pat_bg_clr = val; - break; - case 0xa4f4: /*2D BitBLT Mono Pattern Foreground*/ - case 0xa8f4: /*2D Line Mono Pattern Foreground*/ - case 0xacf4: /*2D Polygon Mono Pattern Foreground*/ - virge->s3d.pat_fg_clr = val; - break; - case 0xa4f8: /*2D BitBLT SRC Background*/ - virge->s3d.src_bg_clr = val; - break; - case 0xa4fc: /*2D BitBLT SRC Foreground*/ - virge->s3d.src_fg_clr = val; - break; - case 0xa500: /*2D BitBLT Command Set*/ - case 0xa900: /*2D Line Command Set*/ - case 0xad00: /*2D Polygon Command Set*/ - virge->s3d.cmd_set = val; - if (!(val & CMD_SET_AE)) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - case 0xa504: /*2D BitBLT Rectangle Width/Height*/ - virge->s3d.r_width = (val >> 16) & 0x7ff; - virge->s3d.r_height = val & 0x7ff; - break; - case 0xa508: /*2D BitBLT Rectangle SRC X/Y*/ - virge->s3d.rsrc_x = (val >> 16) & 0x7ff; - virge->s3d.rsrc_y = val & 0x7ff; - break; - case 0xa50c: /*2D BitBLT Rectangle DEST X/Y*/ - virge->s3d.rdest_x = (val >> 16) & 0x7ff; - virge->s3d.rdest_y = val & 0x7ff; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - case 0xa96c: /*2D Line Draw Endpoints*/ - virge->s3d.lxend0 = (val >> 16) & 0x7ff; - virge->s3d.lxend1 = val & 0x7ff; - break; - case 0xa970: /*2D Line X Delta*/ - virge->s3d.ldx = (int32_t) val; - break; - case 0xa974: /*2D Line X Start*/ - virge->s3d.lxstart = val; - break; - case 0xa978: /*2D Line Y Start*/ - virge->s3d.lystart = val & 0x7ff; - break; - case 0xa97c: /*2D Line Y Count*/ - virge->s3d.lycnt = val & 0x7ff; - virge->s3d.line_dir = val >> 31; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - - case 0xad68: /*2D Polygon Right X Delta*/ - virge->s3d.prdx = val; - break; - case 0xad6c: /*2D Polygon Right X Start*/ - virge->s3d.prxstart = val; - break; - case 0xad70: /*2D Polygon Left X Delta*/ - virge->s3d.pldx = val; - break; - case 0xad74: /*2D Polygon Left X Start*/ - virge->s3d.plxstart = val; - break; - case 0xad78: /*2D Polygon Y Start*/ - virge->s3d.pystart = val & 0x7ff; - break; - case 0xad7c: /*2D Polygon Y Count*/ - virge->s3d.pycnt = val & 0x300007ff; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - - case 0xb0d4: - case 0xb4d4: - virge->s3d_tri.z_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb0d8: - case 0xb4d8: - virge->s3d_tri.dest_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb0dc: - case 0xb4dc: - virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_r = val & 0x7ff; - break; - case 0xb0e0: - case 0xb4e0: - virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_b = val & 0x7ff; - break; - case 0xb0e4: - case 0xb4e4: - virge->s3d_tri.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xb0e8: - case 0xb4e8: - virge->s3d_tri.z_str = val & 0xff8; - break; - case 0xb4ec: - virge->s3d_tri.tex_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb4f0: - virge->s3d_tri.tex_bdr_clr = val & 0xffffff; - break; - case 0xb0f4: - case 0xb4f4: - virge->s3d_tri.fog_b = val & 0xff; - virge->s3d_tri.fog_g = (val >> 8) & 0xff; - virge->s3d_tri.fog_r = (val >> 16) & 0xff; - break; - case 0xb100: - case 0xb500: - virge->s3d_tri.cmd_set = val; - if (!(val & CMD_SET_AE)) - queue_triangle(virge); - break; - case 0xb504: - virge->s3d_tri.tbv = val & 0xfffff; - break; - case 0xb508: - virge->s3d_tri.tbu = val & 0xfffff; - break; - case 0xb50c: - virge->s3d_tri.TdWdX = val; - break; - case 0xb510: - virge->s3d_tri.TdWdY = val; - break; - case 0xb514: - virge->s3d_tri.tws = val; - break; - case 0xb518: - virge->s3d_tri.TdDdX = val; - break; - case 0xb51c: - virge->s3d_tri.TdVdX = val; - break; - case 0xb520: - virge->s3d_tri.TdUdX = val; - break; - case 0xb524: - virge->s3d_tri.TdDdY = val; - break; - case 0xb528: - virge->s3d_tri.TdVdY = val; - break; - case 0xb52c: - virge->s3d_tri.TdUdY = val; - break; - case 0xb530: - virge->s3d_tri.tds = val; - break; - case 0xb534: - virge->s3d_tri.tvs = val; - break; - case 0xb538: - virge->s3d_tri.tus = val; - break; - case 0xb53c: - virge->s3d_tri.TdGdX = val >> 16; - virge->s3d_tri.TdBdX = val & 0xffff; - break; - case 0xb540: - virge->s3d_tri.TdAdX = val >> 16; - virge->s3d_tri.TdRdX = val & 0xffff; - break; - case 0xb544: - virge->s3d_tri.TdGdY = val >> 16; - virge->s3d_tri.TdBdY = val & 0xffff; - break; - case 0xb548: - virge->s3d_tri.TdAdY = val >> 16; - virge->s3d_tri.TdRdY = val & 0xffff; - break; - case 0xb54c: - virge->s3d_tri.tgs = (val >> 16) & 0xffff; - virge->s3d_tri.tbs = val & 0xffff; - break; - case 0xb550: - virge->s3d_tri.tas = (val >> 16) & 0xffff; - virge->s3d_tri.trs = val & 0xffff; - break; - - case 0xb554: - virge->s3d_tri.TdZdX = val; - break; - case 0xb558: - virge->s3d_tri.TdZdY = val; - break; - case 0xb55c: - virge->s3d_tri.tzs = val; - break; - case 0xb560: - virge->s3d_tri.TdXdY12 = val; - break; - case 0xb564: - virge->s3d_tri.txend12 = val; - break; - case 0xb568: - virge->s3d_tri.TdXdY01 = val; - break; - case 0xb56c: - virge->s3d_tri.txend01 = val; - break; - case 0xb570: - virge->s3d_tri.TdXdY02 = val; - break; - case 0xb574: - virge->s3d_tri.txs = val; - break; - case 0xb578: - virge->s3d_tri.tys = val; - break; - case 0xb57c: - virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; - virge->s3d_tri.ty12 = val & 0x7ff; - virge->s3d_tri.tlr = val >> 31; - if (virge->s3d_tri.cmd_set & CMD_SET_AE) - queue_triangle(virge); - break; - - default: - break; - } - } -} - -static void -s3_virge_fifo_thread(void *priv) -{ - virge_t *virge = (virge_t *)priv; +fifo_thread(void *param) { + virge_t *virge = (virge_t *)param; while (virge->fifo_thread_run) { - s3_virge_log("Thread, entries=%d.\n", FIFO_ENTRIES); thread_set_event(virge->fifo_not_full_event); - //thread_wait_event(virge->wake_fifo_thread, -1); - //thread_reset_event(virge->wake_fifo_thread); + thread_wait_event(virge->wake_fifo_thread, -1); + thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; - s3_virge_log("Busy.\n"); while (!FIFO_EMPTY) { - s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x, val=%08x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val); uint64_t start_time = plat_timer_read(); uint64_t end_time; fifo_entry_t *fifo = &virge->fifo[virge->fifo_read_idx & FIFO_MASK]; uint32_t val = fifo->val; - int op_type = (((fifo->addr_type & FIFO_ADDR) & 0x1c00) >> 10) - 1; switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: - s3_virge_mmio_fifo_write(fifo->addr_type & FIFO_ADDR, - val, virge); + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) + s3_virge_bitblt(virge, 8, val); + else if (((fifo->addr_type & FIFO_ADDR) & 0xffff) == 0x859c) + virge->cmd_dma = val; break; case FIFO_WRITE_WORD: - s3_virge_mmio_fifo_write_w(fifo->addr_type & FIFO_ADDR, - val, virge); + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (virge->s3d.cmd_set & CMD_SET_MS) + s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); + else + s3_virge_bitblt(virge, 16, val); + } else if (((fifo->addr_type & FIFO_ADDR) & 0xfffe) == 0x859c) + virge->cmd_dma = val; break; case FIFO_WRITE_DWORD: - s3_virge_mmio_fifo_write_l(fifo->addr_type & FIFO_ADDR, - val, virge); - break; - default: + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (virge->s3d.cmd_set & CMD_SET_MS) + s3_virge_bitblt(virge, 32, + ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | + ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); + else + s3_virge_bitblt(virge, 32, val); + } else { + switch ((fifo->addr_type & FIFO_ADDR) & 0xfffc) { + case 0x8590: + virge->cmd_dma_base = val; + break; + + case 0x8594: + virge->dma_ptr = val; + break; + + case 0x8598: + break; + + case 0x859c: + virge->cmd_dma = val; + break; + + case 0xa000 ... 0xa1fc: + { + int x = (fifo->addr_type & FIFO_ADDR) & 4; + int y = ((fifo->addr_type & FIFO_ADDR) >> 3) & 7; + int color; + int byte; + uint32_t addr = (fifo->addr_type & FIFO_ADDR); + virge->s3d.pattern_8[y * 8 + x] = val & 0xff; + virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; + virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; + virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; + + x = ((fifo->addr_type & FIFO_ADDR) >> 1) & 6; + y = ((fifo->addr_type & FIFO_ADDR) >> 4) & 7; + virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; + virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; + + addr &= 0x00ff; + for (uint8_t xx = 0; xx < 4; xx++) { + x = ((addr + xx) / 3) % 8; + y = ((addr + xx) / 24) % 8; + color = ((addr + xx) % 3) << 3; + byte = (xx << 3); + virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); + virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; + } + + x = ((fifo->addr_type & FIFO_ADDR) >> 2) & 7; + y = ((fifo->addr_type & FIFO_ADDR) >> 5) & 7; + virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; + } break; + + case 0xa4d4: + case 0xa8d4: + case 0xacd4: + virge->s3d.src_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4d8: + case 0xa8d8: + case 0xacd8: + virge->s3d.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4dc: + case 0xa8dc: + case 0xacdc: + virge->s3d.clip_l = (val >> 16) & 0x7ff; + virge->s3d.clip_r = val & 0x7ff; + break; + case 0xa4e0: + case 0xa8e0: + case 0xace0: + virge->s3d.clip_t = (val >> 16) & 0x7ff; + virge->s3d.clip_b = val & 0x7ff; + break; + case 0xa4e4: + case 0xa8e4: + case 0xace4: + virge->s3d.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xa4e8: + case 0xace8: + virge->s3d.mono_pat_0 = val; + break; + case 0xa4ec: + case 0xacec: + virge->s3d.mono_pat_1 = val; + break; + case 0xa4f0: + case 0xacf0: + virge->s3d.pat_bg_clr = val; + break; + case 0xa4f4: + case 0xa8f4: + case 0xacf4: + virge->s3d.pat_fg_clr = val; + break; + case 0xa4f8: + virge->s3d.src_bg_clr = val; + break; + case 0xa4fc: + virge->s3d.src_fg_clr = val; + break; + case 0xa500: + case 0xa900: + case 0xad00: + virge->s3d.cmd_set = val; + if (!(val & CMD_SET_AE)) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa504: + virge->s3d.r_width = (val >> 16) & 0x7ff; + virge->s3d.r_height = val & 0x7ff; + break; + case 0xa508: + virge->s3d.rsrc_x = (val >> 16) & 0x7ff; + virge->s3d.rsrc_y = val & 0x7ff; + break; + case 0xa50c: + virge->s3d.rdest_x = (val >> 16) & 0x7ff; + virge->s3d.rdest_y = val & 0x7ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa96c: + virge->s3d.lxend0 = (val >> 16) & 0x7ff; + virge->s3d.lxend1 = val & 0x7ff; + break; + case 0xa970: + virge->s3d.ldx = (int32_t)val; + break; + case 0xa974: + virge->s3d.lxstart = val; + break; + case 0xa978: + virge->s3d.lystart = val & 0x7ff; + break; + case 0xa97c: + virge->s3d.lycnt = val & 0x7ff; + virge->s3d.line_dir = val >> 31; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xad68: + virge->s3d.prdx = val; + break; + case 0xad6c: + virge->s3d.prxstart = val; + break; + case 0xad70: + virge->s3d.pldx = val; + break; + case 0xad74: + virge->s3d.plxstart = val; + break; + case 0xad78: + virge->s3d.pystart = val & 0x7ff; + break; + case 0xad7c: + virge->s3d.pycnt = val & 0x300007ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xb0d4: + case 0xb4d4: + virge->s3d_tri.z_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0d8: + case 0xb4d8: + virge->s3d_tri.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0dc: + case 0xb4dc: + virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_r = val & 0x7ff; + break; + case 0xb0e0: + case 0xb4e0: + virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_b = val & 0x7ff; + break; + case 0xb0e4: + case 0xb4e4: + virge->s3d_tri.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xb0e8: + case 0xb4e8: + virge->s3d_tri.z_str = val & 0xff8; + break; + case 0xb4ec: + virge->s3d_tri.tex_base = val & 0x3ffff8; + break; + case 0xb4f0: + virge->s3d_tri.tex_bdr_clr = val & 0xffffff; + break; + case 0xb0f4: + case 0xb4f4: + virge->s3d_tri.fog_b = val & 0xff; + virge->s3d_tri.fog_g = (val >> 8) & 0xff; + virge->s3d_tri.fog_r = (val >> 16) & 0xff; + break; + case 0xb100: + case 0xb500: + virge->s3d_tri.cmd_set = val; + if (!(val & CMD_SET_AE)) + queue_triangle(virge); + break; + case 0xb504: + virge->s3d_tri.tbv = val & 0xfffff; + break; + case 0xb508: + virge->s3d_tri.tbu = val & 0xfffff; + break; + case 0xb50c: + virge->s3d_tri.TdWdX = val; + break; + case 0xb510: + virge->s3d_tri.TdWdY = val; + break; + case 0xb514: + virge->s3d_tri.tws = val; + break; + case 0xb518: + virge->s3d_tri.TdDdX = val; + break; + case 0xb51c: + virge->s3d_tri.TdVdX = val; + break; + case 0xb520: + virge->s3d_tri.TdUdX = val; + break; + case 0xb524: + virge->s3d_tri.TdDdY = val; + break; + case 0xb528: + virge->s3d_tri.TdVdY = val; + break; + case 0xb52c: + virge->s3d_tri.TdUdY = val; + break; + case 0xb530: + virge->s3d_tri.tds = val; + break; + case 0xb534: + virge->s3d_tri.tvs = val; + break; + case 0xb538: + virge->s3d_tri.tus = val; + break; + case 0xb53c: + virge->s3d_tri.TdGdX = val >> 16; + virge->s3d_tri.TdBdX = val & 0xffff; + break; + case 0xb540: + virge->s3d_tri.TdAdX = val >> 16; + virge->s3d_tri.TdRdX = val & 0xffff; + break; + case 0xb544: + virge->s3d_tri.TdGdY = val >> 16; + virge->s3d_tri.TdBdY = val & 0xffff; + break; + case 0xb548: + virge->s3d_tri.TdAdY = val >> 16; + virge->s3d_tri.TdRdY = val & 0xffff; + break; + case 0xb54c: + virge->s3d_tri.tgs = (val >> 16) & 0xffff; + virge->s3d_tri.tbs = val & 0xffff; + break; + case 0xb550: + virge->s3d_tri.tas = (val >> 16) & 0xffff; + virge->s3d_tri.trs = val & 0xffff; + break; + + case 0xb554: + virge->s3d_tri.TdZdX = val; + break; + case 0xb558: + virge->s3d_tri.TdZdY = val; + break; + case 0xb55c: + virge->s3d_tri.tzs = val; + break; + case 0xb560: + virge->s3d_tri.TdXdY12 = val; + break; + case 0xb564: + virge->s3d_tri.txend12 = val; + break; + case 0xb568: + virge->s3d_tri.TdXdY01 = val; + break; + case 0xb56c: + virge->s3d_tri.txend01 = val; + break; + case 0xb570: + virge->s3d_tri.TdXdY02 = val; + break; + case 0xb574: + virge->s3d_tri.txs = val; + break; + case 0xb578: + virge->s3d_tri.tys = val; + break; + case 0xb57c: + virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; + virge->s3d_tri.ty12 = val & 0x7ff; + virge->s3d_tri.tlr = val >> 31; + if (virge->s3d_tri.cmd_set & CMD_SET_AE) + queue_triangle(virge); + break; + } + } break; } virge->fifo_read_idx++; fifo->addr_type = FIFO_INVALID; - if (op_type) { - if (FIFO_ENTRIES > 0xe000) - thread_set_event(virge->fifo_not_full_event); - } else { - thread_set_event(virge->fifo_not_full_event); - if (((fifo->addr_type & FIFO_ADDR) & 0xffff) >= 0x8000) - virge->fifo_read_idx &= FIFO_MASK; - } + if (FIFO_ENTRIES > 0xe000) + thread_set_event(virge->fifo_not_full_event); - end_time = plat_timer_read(); - virge->blitter_time += end_time - start_time; - } - virge->virge_busy = 0; - virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; - s3_virge_update_irqs(virge); + end_time = plat_timer_read(); + virge_time += end_time - start_time; + } + virge->virge_busy = 0; + virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; + s3_virge_update_irqs(virge); } } static void -s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) -{ - int op_type = ((addr & 0x1c00) >> 10) - 1; +s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) { fifo_entry_t *fifo = &virge->fifo[virge->fifo_write_idx & FIFO_MASK]; if (FIFO_FULL) { @@ -2039,373 +1817,357 @@ s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) virge->fifo_write_idx++; - if (op_type) { - if (FIFO_ENTRIES > 0xe000) - s3_virge_wake_fifo_thread(virge); - if ((FIFO_ENTRIES > 0xe000) || (FIFO_ENTRIES < 8)) - s3_virge_wake_fifo_thread(virge); - } else { - if (FIFO_ENTRIES < 16) { - s3_virge_wake_fifo_thread(virge); - s3_virge_log("FIFO ENTRIES=%d.\n", FIFO_ENTRIES); - } else { - s3_virge_log("FIFO FULL.\n"); - thread_reset_event(virge->fifo_not_full_event); - thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ - } - if (((addr & 0xffff) >= 0x8000)) { - if (virge->fifo_write_idx == FIFO_SIZE) - virge->fifo_write_idx = 1; - } - } + if (FIFO_ENTRIES > 0xe000) + wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) + wake_fifo_thread(virge); } static void -s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) -{ +s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) { virge_t *virge = (virge_t *) priv; - s3_virge_log("MMIO WriteB addr = %04x, val = %02x\n", addr & 0xffff, val); - if ((addr & 0xffff) < 0x8000) + + if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_BYTE); - else { - switch (addr & 0xffff) { - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - s3_virge_out(addr & 0x3ff, val, virge); - break; + else switch (addr & 0xffff) { + case 0x83b0 ... 0x83df: + s3_virge_out(addr & 0x3ff, val, priv); + break; - case 0xff20: - virge->serialport = val; - i2c_gpio_set(virge->i2c, !!(val & SERIAL_PORT_SCW), !!(val & SERIAL_PORT_SDW)); - break; - - default: - break; - } + case 0xff20: + virge->serialport = val; + i2c_gpio_set(virge->i2c, !!(val & SERIAL_PORT_SCW), !!(val & SERIAL_PORT_SDW)); + break; } } static void -s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) -{ +s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) { virge_t *virge = (virge_t *) priv; - s3_virge_log("[%04X:%08X]: MMIO WriteW addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffe, val); - if ((addr & 0xfffe) < 0x8000) + + if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_WORD); - else { - if ((addr & 0xfffe) == 0x83d4) { - s3_virge_mmio_write(addr, val, virge); - s3_virge_mmio_write(addr + 1, val >> 8, virge); - } else if ((addr & 0xfffe) == 0xff20) - s3_virge_mmio_write(addr, val, virge); + else switch (addr & 0xfffe) { + case 0x83d4: + s3_virge_mmio_write(addr, val, priv); + s3_virge_mmio_write(addr + 1, val >> 8, priv); + break; + + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; } } static void -s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) -{ +s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; - s3_virge_log("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); - if (((addr & 0xfffc) < 0x8000) || ((addr & 0xe000) == 0xa000)) + if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); - else { - switch (addr & 0xfffc) { - case 0x8180: - virge->streams.pri_ctrl = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x8184: - virge->streams.chroma_ctrl = val; - break; - case 0x8190: - virge->streams.sec_ctrl = val; - virge->streams.dda_horiz_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_horiz_accumulator |= 0xfffff800; - virge->streams.sdif = (val >> 24) & 7; - break; - case 0x8194: - virge->streams.chroma_upper_bound = val; - break; - case 0x8198: - virge->streams.sec_filter = val; - virge->streams.k1_horiz_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_horiz_scale |= 0xfffff800; - virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; - if ((val >> 16) & (1 << 10)) - virge->streams.k2_horiz_scale |= 0xfffff800; - break; - case 0x81a0: - virge->streams.blend_ctrl = val; - svga_recalctimings(svga); - break; - case 0x81c0: - virge->streams.pri_fb0 = val & 0x7fffff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81c4: - virge->streams.pri_fb1 = val & 0x7fffff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81c8: - virge->streams.pri_stride = val & 0xfff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81cc: - virge->streams.buffer_ctrl = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d0: - virge->streams.sec_fb0 = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d4: - virge->streams.sec_fb1 = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d8: - virge->streams.sec_stride = val; - svga->fullchange = changeframecount; - break; - case 0x81dc: - virge->streams.overlay_ctrl = val; - break; - case 0x81e0: - virge->streams.k1_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_vert_scale |= 0xfffff800; - break; - case 0x81e4: - virge->streams.k2_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k2_vert_scale |= 0xfffff800; - break; - case 0x81e8: - virge->streams.dda_vert_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_vert_accumulator |= 0xfffff800; - break; - case 0x81ec: - virge->streams.fifo_ctrl = val; - break; - case 0x81f0: - virge->streams.pri_start = val; - virge->streams.pri_x = (val >> 16) & 0x7ff; - virge->streams.pri_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f4: - virge->streams.pri_size = val; - virge->streams.pri_w = (val >> 16) & 0x7ff; - virge->streams.pri_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f8: - virge->streams.sec_start = val; - virge->streams.sec_x = (val >> 16) & 0x7ff; - virge->streams.sec_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81fc: - virge->streams.sec_size = val; - virge->streams.sec_w = (val >> 16) & 0x7ff; - virge->streams.sec_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; + else if ((addr & 0xe000) == 0xa000) + s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); + else switch (addr & 0xfffc) { + case 0x8180: + virge->streams.pri_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x8184: + virge->streams.chroma_ctrl = val; + break; + case 0x8190: + virge->streams.sec_ctrl = val; + virge->streams.dda_horiz_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_horiz_accumulator |= 0xfffff800; + virge->streams.sdif = (val >> 24) & 7; + break; + case 0x8194: + virge->streams.chroma_upper_bound = val; + break; + case 0x8198: + virge->streams.sec_filter = val; + virge->streams.k1_horiz_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_horiz_scale |= 0xfffff800; + virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; + if ((val >> 16) & (1 << 10)) + virge->streams.k2_horiz_scale |= 0xfffff800; + break; + case 0x81a0: + virge->streams.blend_ctrl = val; + break; + case 0x81c0: + virge->streams.pri_fb0 = val & 0x3fffff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81c4: + virge->streams.pri_fb1 = val & 0x3fffff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81c8: + virge->streams.pri_stride = val & 0xfff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81cc: + virge->streams.buffer_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d0: + virge->streams.sec_fb0 = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d4: + virge->streams.sec_fb1 = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d8: + virge->streams.sec_stride = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81dc: + virge->streams.overlay_ctrl = val; + break; + case 0x81e0: + virge->streams.k1_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_vert_scale |= 0xfffff800; + break; + case 0x81e4: + virge->streams.k2_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k2_vert_scale |= 0xfffff800; + break; + case 0x81e8: + virge->streams.dda_vert_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_vert_accumulator |= 0xfffff800; + break; + case 0x81ec: + virge->streams.fifo_ctrl = val; + break; + case 0x81f0: + virge->streams.pri_start = val; + virge->streams.pri_x = (val >> 16) & 0x7ff; + virge->streams.pri_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f4: + virge->streams.pri_size = val; + virge->streams.pri_w = (val >> 16) & 0x7ff; + virge->streams.pri_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f8: + virge->streams.sec_start = val; + virge->streams.sec_x = (val >> 16) & 0x7ff; + virge->streams.sec_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81fc: + virge->streams.sec_size = val; + virge->streams.sec_w = (val >> 16) & 0x7ff; + virge->streams.sec_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; - case 0x8504: - virge->subsys_stat &= ~(val & 0xff); - virge->subsys_cntl = (val >> 8); - s3_virge_update_irqs(virge); - break; + case 0x8504: + virge->subsys_stat &= ~(val & 0xff); + virge->subsys_cntl = (val >> 8); + s3_virge_update_irqs(virge); + break; - case 0x850c: - virge->advfunc_cntl = val & 0xff; - s3_virge_updatemapping(virge); - break; + case 0x850c: + virge->advfunc_cntl = val & 0xff; + s3_virge_updatemapping(virge); + break; - case 0xff20: - s3_virge_mmio_write(addr, val, virge); - break; - - default: - s3_virge_log("Actual WriteL=%04x.\n", addr & 0xfffc); - break; - } + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; } } -#define READ(addr, val) \ - { \ - switch (bpp) { \ - case 0: /*8 bpp*/ \ - val = vram[addr & virge->vram_mask]; \ - break; \ - case 1: /*16 bpp*/ \ - val = *(uint16_t *) &vram[addr & virge->vram_mask]; \ - break; \ - case 2: /*24 bpp*/ \ - val = (*(uint32_t *) &vram[addr & virge->vram_mask]) & 0xffffff; \ - break; \ - } \ - } +#define READ(addr, val) \ + do { \ + switch (bpp) { \ + case 0: /*8 bpp*/ \ + val = vram[addr & svga->vram_mask]; \ + break; \ + case 1: /*16 bpp*/ \ + val = *(uint16_t *)&vram[addr & svga->vram_mask]; \ + break; \ + case 2: /*24 bpp*/ \ + val = (*(uint32_t *)&vram[addr & svga->vram_mask]) & 0xffffff; \ + break; \ + } \ + } while (0) -#define CLIP(x, y) \ - { \ - if ((virge->s3d.cmd_set & CMD_SET_HC) && (x < virge->s3d.clip_l || x > virge->s3d.clip_r || y < virge->s3d.clip_t || y > virge->s3d.clip_b)) \ - update = 0; \ - } +#define Z_READ(addr) *(uint16_t *)&vram[addr & svga->vram_mask] -#define CLIP_3D(x, y) \ - { \ - if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || x > s3d_tri->clip_r || y < s3d_tri->clip_t || y > s3d_tri->clip_b)) \ - update = 0; \ - } +#define Z_WRITE(addr, val) \ + if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ + *(uint16_t *)&vram[addr & svga->vram_mask] = val -#define MIX() \ - { \ - int c; \ - for (c = 0; c < 24; c++) { \ - int d = (dest & (1 << c)) ? 1 : 0; \ - if (source & (1 << c)) \ - d |= 2; \ - if (pattern & (1 << c)) \ - d |= 4; \ - if (virge->s3d.rop & (1 << d)) \ - out |= (1 << c); \ - } \ - } +#define CLIP(x, y) \ + do { \ + if ((virge->s3d.cmd_set & CMD_SET_HC) && \ + (x < virge->s3d.clip_l || x > virge->s3d.clip_r || \ + y < virge->s3d.clip_t || y > virge->s3d.clip_b)) \ + update = 0; \ + } while (0) -#define WRITE(addr, val) \ - { \ - switch (bpp) { \ - case 0: /*8 bpp*/ \ - vram[addr & virge->vram_mask] = val; \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - case 1: /*16 bpp*/ \ - *(uint16_t *) &vram[addr & virge->vram_mask] = val; \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - case 2: /*24 bpp*/ \ - *(uint32_t *) &vram[addr & virge->vram_mask] = (val & 0xffffff) | (vram[(addr + 3) & virge->vram_mask] << 24); \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - } \ - } +#define CLIP_3D(x, y) \ + do { \ + if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || \ + x > s3d_tri->clip_r || y < s3d_tri->clip_t || \ + y > s3d_tri->clip_b)) \ + update = 0; \ + } while (0) + +#define Z_CLIP(Zzb, Zs) \ + do { \ + if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ + switch ((s3d_tri->cmd_set >> 20) & 7) { \ + case 0: \ + update = 0; \ + break; \ + case 1: \ + if (Zs <= Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 2: \ + if (Zs != Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 3: \ + if (Zs < Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 4: \ + if (Zs >= Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 5: \ + if (Zs == Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 6: \ + if (Zs > Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 7: \ + update = 1; \ + Zzb = Zs; \ + break; \ + } \ + } while (0) + +#define MIX() \ + do { \ + int c; \ + for (c = 0; c < 24; c++) { \ + int d = (dest & (1 << c)) ? 1 : 0; \ + if (source & (1 << c)) \ + d |= 2; \ + if (pattern & (1 << c)) \ + d |= 4; \ + if (virge->s3d.rop & (1 << d)) \ + out |= (1 << c); \ + } \ + } while (0) + +#define WRITE(addr, val) \ + do { \ + switch (bpp) { \ + case 0: /*8 bpp*/ \ + vram[addr & svga->vram_mask] = val; \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + case 1: /*16 bpp*/ \ + *(uint16_t *)&vram[addr & svga->vram_mask] = val; \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + case 2: /*24 bpp*/ \ + *(uint32_t *)&vram[addr & svga->vram_mask] = (val & 0xffffff) |\ + (vram[(addr + 3) & svga->vram_mask] << 24); \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + } \ + } while (0) static void -s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) -{ - svga_t *svga = &virge->svga; - uint8_t *vram = svga->vram; - uint32_t mono_pattern[64]; - int count_mask; - int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1; - int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1; - int bpp; - int x_mul; - int cpu_dat_shift; - const uint32_t *pattern_data; - uint32_t src_fg_clr; - uint32_t src_bg_clr; - uint32_t src_addr; - uint32_t dest_addr; - uint32_t source = 0; - uint32_t dest = 0; - uint32_t pattern; - uint32_t out = 0; - int update; +s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { + svga_t *svga = &virge->svga; + uint8_t *vram = virge->svga.vram; + uint32_t mono_pattern[64]; + int count_mask; + int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1; + int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1; + int bpp; + int x_mul; + int cpu_dat_shift; + uint32_t *pattern_data; + uint32_t src_fg_clr; + uint32_t src_bg_clr; switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK) { case CMD_SET_FORMAT_8: - bpp = 0; - x_mul = 1; + bpp = 0; + x_mul = 1; cpu_dat_shift = 8; - pattern_data = virge->s3d.pattern_8; - src_fg_clr = virge->s3d.src_fg_clr & 0xff; - src_bg_clr = virge->s3d.src_bg_clr & 0xff; + pattern_data = virge->s3d.pattern_8; + src_fg_clr = virge->s3d.src_fg_clr & 0xff; + src_bg_clr = virge->s3d.src_bg_clr & 0xff; break; case CMD_SET_FORMAT_16: - bpp = 1; - x_mul = 2; + bpp = 1; + x_mul = 2; cpu_dat_shift = 16; - pattern_data = virge->s3d.pattern_16; - src_fg_clr = virge->s3d.src_fg_clr & 0xffff; - src_bg_clr = virge->s3d.src_bg_clr & 0xffff; + pattern_data = virge->s3d.pattern_16; + src_fg_clr = virge->s3d.src_fg_clr & 0xffff; + src_bg_clr = virge->s3d.src_bg_clr & 0xffff; break; case CMD_SET_FORMAT_24: default: - bpp = 2; - x_mul = 3; + bpp = 2; + x_mul = 3; cpu_dat_shift = 24; - pattern_data = virge->s3d.pattern_24; - src_fg_clr = virge->s3d.src_fg_clr; - src_bg_clr = virge->s3d.src_bg_clr; + pattern_data = virge->s3d.pattern_24; + src_fg_clr = virge->s3d.src_fg_clr; + src_bg_clr = virge->s3d.src_bg_clr; break; - } - if (virge->s3d.cmd_set & CMD_SET_MP) - pattern_data = mono_pattern; + } + if (virge->s3d.cmd_set & CMD_SET_MP) + pattern_data = mono_pattern; switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK) { case CMD_SET_ITA_BYTE: @@ -2420,55 +2182,51 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) break; } if (virge->s3d.cmd_set & CMD_SET_MP) { - for (uint8_t y = 0; y < 4; y++) { - for (uint8_t x = 0; x < 8; x++) { - if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) - mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_fg_clr; - else - mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_bg_clr; - if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) - mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_fg_clr; - else - mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_bg_clr; - } + int x; + int y; + for (y = 0; y < 4; y++) { + for (x = 0; x < 8; x++) { + if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) + mono_pattern[y * 8 + x] = virge->s3d.pat_fg_clr; + else + mono_pattern[y * 8 + x] = virge->s3d.pat_bg_clr; + if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) + mono_pattern[(y + 4) * 8 + x] = virge->s3d.pat_fg_clr; + else + mono_pattern[(y + 4) * 8 + x] = virge->s3d.pat_bg_clr; + } } } - switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK) { + case CMD_SET_COMMAND_NOP: + break; + case CMD_SET_COMMAND_BITBLT: if (count == -1) { - virge->s3d.src_x = virge->s3d.rsrc_x; - virge->s3d.src_y = virge->s3d.rsrc_y; - virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.dest_y = virge->s3d.rdest_y; - virge->s3d.w = virge->s3d.r_width; - virge->s3d.h = virge->s3d.r_height; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; + virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_y = virge->s3d.rsrc_y; + virge->s3d.dest_x = virge->s3d.rdest_x; + virge->s3d.dest_y = virge->s3d.rdest_y; + virge->s3d.w = virge->s3d.r_width; + virge->s3d.h = virge->s3d.r_height; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; virge->s3d.data_left_count = 0; - s3_virge_log("BitBlt start src_x=%i,src_y=%i,dest_x=%i,dest_y=%i,w=%i,h=%i,rop=%02X,src_base=%x,dest_base=%x\n", - virge->s3d.src_x, - virge->s3d.src_y, - virge->s3d.dest_x, - virge->s3d.dest_y, - virge->s3d.w, - virge->s3d.h, - virge->s3d.rop, - virge->s3d.src_base, - virge->s3d.dest_base); - if (virge->s3d.cmd_set & CMD_SET_IDS) return; } - if (!virge->s3d.h) return; - while (count) { - src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str); - dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - out = 0; - update = 1; + uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + + (virge->s3d.src_y * virge->s3d.src_str); + uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern; + uint32_t out = 0; + int update = 1; switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS)) { case 0: @@ -2486,18 +2244,18 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) count -= (cpu_dat_shift - virge->s3d.data_left_count); virge->s3d.data_left_count = 0; if (count < cpu_dat_shift) { - virge->s3d.data_left = cpu_dat; + virge->s3d.data_left = cpu_dat; virge->s3d.data_left_count = count; - count = 0; + count = 0; } } else { source = cpu_dat; cpu_dat >>= cpu_dat_shift; count -= cpu_dat_shift; if (count < cpu_dat_shift) { - virge->s3d.data_left = cpu_dat; + virge->s3d.data_left = cpu_dat; virge->s3d.data_left_count = count; - count = 0; + count = 0; } } if ((virge->s3d.cmd_set & CMD_SET_TP) && source == src_fg_clr) @@ -2510,9 +2268,6 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) cpu_dat <<= 1; count--; break; - - default: - break; } CLIP(virge->s3d.dest_x, virge->s3d.dest_y); @@ -2530,9 +2285,9 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) virge->s3d.dest_x += x_inc; virge->s3d.dest_x &= 0x7ff; if (!virge->s3d.w) { - virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_x = virge->s3d.rsrc_x; virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.w = virge->s3d.r_width; + virge->s3d.w = virge->s3d.r_width; virge->s3d.src_y += y_inc; virge->s3d.dest_y += y_inc; @@ -2549,9 +2304,6 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) cpu_dat <<= (count - (count & count_mask)); count &= count_mask; break; - - default: - break; } if (!virge->s3d.h) return; @@ -2563,27 +2315,23 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) case CMD_SET_COMMAND_RECTFILL: /*No source, pattern = pat_fg_clr*/ if (count == -1) { - virge->s3d.src_x = virge->s3d.rsrc_x; - virge->s3d.src_y = virge->s3d.rsrc_y; + virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_y = virge->s3d.rsrc_y; virge->s3d.dest_x = virge->s3d.rdest_x; virge->s3d.dest_y = virge->s3d.rdest_y; - virge->s3d.w = virge->s3d.r_width; - virge->s3d.h = virge->s3d.r_height; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; - - s3_virge_log("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x, - virge->s3d.dest_y, - virge->s3d.w, - virge->s3d.h, - virge->s3d.rop, virge->s3d.dest_base); + virge->s3d.w = virge->s3d.r_width; + virge->s3d.h = virge->s3d.r_height; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; } while (count && virge->s3d.h) { - source = virge->s3d.pat_fg_clr; - dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - pattern = virge->s3d.pat_fg_clr; - out = 0; - update = 1; + uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern = virge->s3d.pat_fg_clr; + uint32_t out = 0; + int update = 1; CLIP(virge->s3d.dest_x, virge->s3d.dest_y); @@ -2600,9 +2348,9 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) virge->s3d.dest_x += x_inc; virge->s3d.dest_x &= 0x7ff; if (!virge->s3d.w) { - virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_x = virge->s3d.rsrc_x; virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.w = virge->s3d.r_width; + virge->s3d.w = virge->s3d.r_width; virge->s3d.src_y += y_inc; virge->s3d.dest_y += y_inc; @@ -2610,8 +2358,7 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) if (!virge->s3d.h) return; } else - virge->s3d.w--; - + virge->s3d.w--; count--; } break; @@ -2620,8 +2367,8 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) if (count == -1) { virge->s3d.dest_x = virge->s3d.lxstart; virge->s3d.dest_y = virge->s3d.lystart; - virge->s3d.h = virge->s3d.lycnt; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; + virge->s3d.h = virge->s3d.lycnt; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; } while (virge->s3d.h) { int x; @@ -2630,7 +2377,8 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) x = virge->s3d.dest_x >> 20; - if (virge->s3d.h == virge->s3d.lycnt && ((virge->s3d.line_dir && x > virge->s3d.lxend0) || (!virge->s3d.line_dir && x < virge->s3d.lxend0))) + if (virge->s3d.h == virge->s3d.lycnt && ((virge->s3d.line_dir && x > virge->s3d.lxend0) || + (!virge->s3d.line_dir && x < virge->s3d.lxend0))) x = virge->s3d.lxend0; if (virge->s3d.h == 1) @@ -2642,17 +2390,22 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) goto skip_line; do { - uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - uint32_t source = 0; - uint32_t dest = 0; + uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; uint32_t pattern; - uint32_t out = 0; - int update = 1; + uint32_t out = 0; + int update = 1; - if ((virge->s3d.h == virge->s3d.lycnt || !first_pixel) && ((virge->s3d.line_dir && x < virge->s3d.lxend0) || (!virge->s3d.line_dir && x > virge->s3d.lxend0))) + if ((virge->s3d.h == virge->s3d.lycnt || !first_pixel) && + ((virge->s3d.line_dir && x < virge->s3d.lxend0) || + (!virge->s3d.line_dir && x > virge->s3d.lxend0))) update = 0; - if ((virge->s3d.h == 1 || !first_pixel) && ((virge->s3d.line_dir && x > virge->s3d.lxend1) || (!virge->s3d.line_dir && x < virge->s3d.lxend1))) + if ((virge->s3d.h == 1 || !first_pixel) && + ((virge->s3d.line_dir && x > virge->s3d.lxend1) || + (!virge->s3d.line_dir && x < virge->s3d.lxend1))) update = 0; CLIP(x, virge->s3d.dest_y); @@ -2686,68 +2439,71 @@ skip_line: virge->s3d.dest_r = virge->s3d.prxstart; if (virge->s3d.pycnt & (1 << 29)) virge->s3d.dest_l = virge->s3d.plxstart; - virge->s3d.h = virge->s3d.pycnt & 0x7ff; + virge->s3d.h = virge->s3d.pycnt & 0x7ff; virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; while (virge->s3d.h) { - int x = virge->s3d.dest_l >> 20; - int xend = virge->s3d.dest_r >> 20; - int y = virge->s3d.pystart & 0x7ff; - int xdir = (x < xend) ? 1 : -1; - do { - uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); - uint32_t source = 0; - uint32_t dest = 0; - uint32_t pattern; - uint32_t out = 0; - int update = 1; + int x = virge->s3d.dest_l >> 20; + int xend = virge->s3d.dest_r >> 20; + int y = virge->s3d.pystart & 0x7ff; + int xdir = (x < xend) ? 1 : -1; + do { + uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern; + uint32_t out = 0; + int update = 1; - CLIP(x, y); + CLIP(x, y); - if (update) { - READ(dest_addr, dest); - pattern = pattern_data[(y & 7) * 8 + (x & 7)]; - MIX(); + if (update) { + READ(dest_addr, dest); + pattern = pattern_data[(y & 7) * 8 + (x & 7)]; - WRITE(dest_addr, out); - } + MIX(); - x = (x + xdir) & 0x7ff; - } while (x != (xend + xdir)); + WRITE(dest_addr, out); + } - virge->s3d.dest_l += virge->s3d.pldx; - virge->s3d.dest_r += virge->s3d.prdx; - virge->s3d.h--; - virge->s3d.pystart = (virge->s3d.pystart - 1) & 0x7ff; + x = (x + xdir) & 0x7ff; + } while (x != (xend + xdir)); + + virge->s3d.dest_l += virge->s3d.pldx; + virge->s3d.dest_r += virge->s3d.prdx; + virge->s3d.h--; + virge->s3d.pystart = (virge->s3d.pystart - 1) & 0x7ff; } break; - case CMD_SET_COMMAND_NOP: - break; - default: - break; + fatal("s3_virge_bitblt : blit command %i %08x\n", + (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set); } } -#define RGB15_TO_24(val, r, g, b) \ - b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \ - g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \ - r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12); +#define RGB15_TO_24(val, r, g, b) \ + b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \ + g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \ + r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12); -#define RGB24_TO_24(val, r, g, b) \ - b = val & 0xff; \ - g = (val & 0xff00) >> 8; \ - r = (val & 0xff0000) >> 16 +#define RGB24_TO_24(val, r, g, b) \ + b = val & 0xff; \ + g = (val & 0xff00) >> 8; \ + r = (val & 0xff0000) >> 16 -#define RGB15(r, g, b, dest) \ - if (virge->dithering_enabled) { \ - int add = dither[_y & 3][_x & 3]; \ - int _r = (r > 248) ? 248 : r + add; \ - int _g = (g > 248) ? 248 : g + add; \ - int _b = (b > 248) ? 248 : b + add; \ - dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \ - } else \ - dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10) +#define RGB15(r, g, b, dest) \ + if (virge->dithering_enabled) { \ + int add = dither[_y & 3][_x & 3]; \ + int _r = (r > 248) ? 248 : r + add; \ + int _g = (g > 248) ? 248 : g + add; \ + int _b = (b > 248) ? 248 : b + add; \ + dest = ((_b >> 3) & 0x1f) | \ + (((_g >> 3) & 0x1f) << 5) | \ + (((_r >> 3) & 0x1f) << 10); \ + } else \ + dest = ((b >> 3) & 0x1f) | \ + (((g >> 3) & 0x1f) << 5) | \ + (((r >> 3) & 0x1f) << 10) #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16)) @@ -2756,32 +2512,50 @@ typedef struct rgba_t { } rgba_t; typedef struct s3d_state_t { - int32_t r, g, b, a, u, v, d, w; + int32_t r; + int32_t g; + int32_t b; + int32_t a; + int32_t u; + int32_t v; + int32_t d; + int32_t w; - int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w; + int32_t base_r; + int32_t base_g; + int32_t base_b; + int32_t base_a; + int32_t base_u; + int32_t base_v; + int32_t base_d; + int32_t base_w; - uint32_t base_z; + uint32_t base_z; - uint32_t tbu, tbv; + uint32_t tbu; + uint32_t tbv; - uint32_t cmd_set; - int max_d; + uint32_t cmd_set; + int max_d; uint16_t *texture[10]; - uint32_t tex_bdr_clr; + uint32_t tex_bdr_clr; - int32_t x1, x2; - int y; + int32_t x1; + int32_t x2; - rgba_t dest_rgba; + int y; + + rgba_t dest_rgba; } s3d_state_t; typedef struct s3d_texture_state_t { - int level; - int texture_shift; + int level; + int texture_shift; - int32_t u, v; + int32_t u; + int32_t v; } s3d_texture_state_t; static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out); @@ -2795,10 +2569,10 @@ static int _x; static int _y; static void -tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12); out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7); @@ -2807,10 +2581,10 @@ tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out } static void -tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2822,10 +2596,10 @@ tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8); out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4); @@ -2834,10 +2608,10 @@ tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out } static void -tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2849,21 +2623,22 @@ tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint32_t val = ((uint32_t *) state->texture[texture_state->level])[offset]; +tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; out->r = (val >> 16) & 0xff; out->g = (val >> 8) & 0xff; out->b = val & 0xff; out->a = (val >> 24) & 0xff; } + static void -tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint32_t val = ((uint32_t *) state->texture[texture_state->level])[offset]; +tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2875,21 +2650,19 @@ tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_sample_normal(s3d_state_t *state) -{ +tex_sample_normal(s3d_state_t *state) { s3d_texture_state_t texture_state; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = state->u + state->tbu; - texture_state.v = state->v + state->tbv; + texture_state.u = state->u + state->tbu; + texture_state.v = state->v + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_normal_filter(s3d_state_t *state) -{ +tex_sample_normal_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; int tex_offset; rgba_t tex_samples[4]; @@ -2897,9 +2670,9 @@ tex_sample_normal_filter(s3d_state_t *state) int dv; int d[4]; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = state->u + state->tbu; texture_state.v = state->v + state->tbv; @@ -2924,30 +2697,32 @@ tex_sample_normal_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_mipmap(s3d_state_t *state) -{ +tex_sample_mipmap(s3d_state_t *state) { s3d_texture_state_t texture_state; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = state->u + state->tbu; - texture_state.v = state->v + state->tbv; + texture_state.u = state->u + state->tbu; + texture_state.v = state->v + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_mipmap_filter(s3d_state_t *state) -{ +tex_sample_mipmap_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; int tex_offset; rgba_t tex_samples[4]; @@ -2959,7 +2734,7 @@ tex_sample_mipmap_filter(s3d_state_t *state) if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = state->u + state->tbu; texture_state.v = state->v + state->tbv; @@ -2984,34 +2759,36 @@ tex_sample_mipmap_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_normal(s3d_state_t *state) -{ +tex_sample_persp_normal(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_normal_filter(s3d_state_t *state) -{ +tex_sample_persp_normal_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3021,14 +2798,14 @@ tex_sample_persp_normal_filter(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3053,34 +2830,36 @@ tex_sample_persp_normal_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_normal_375(s3d_state_t *state) -{ +tex_sample_persp_normal_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_normal_filter_375(s3d_state_t *state) -{ +tex_sample_persp_normal_filter_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3090,14 +2869,14 @@ tex_sample_persp_normal_filter_375(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3122,36 +2901,38 @@ tex_sample_persp_normal_filter_375(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_mipmap(s3d_state_t *state) -{ +tex_sample_persp_mipmap(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_mipmap_filter(s3d_state_t *state) -{ +tex_sample_persp_mipmap_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3161,16 +2942,16 @@ tex_sample_persp_mipmap_filter(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3195,36 +2976,38 @@ tex_sample_persp_mipmap_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_mipmap_375(s3d_state_t *state) -{ +tex_sample_persp_mipmap_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_mipmap_filter_375(s3d_state_t *state) -{ +tex_sample_persp_mipmap_filter_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3234,16 +3017,16 @@ tex_sample_persp_mipmap_filter_375(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3268,47 +3051,50 @@ tex_sample_persp_mipmap_filter_375(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } -#define CLAMP(x) \ - do { \ - if ((x) & ~0xff) \ - x = ((x) < 0) ? 0 : 0xff; \ +#define CLAMP(x) \ + do { \ + if ((x) & ~0xff) \ + x = ((x) < 0) ? 0 : 0xff; \ } while (0) -#define CLAMP_RGBA(r, g, b, a) \ - if ((r) & ~0xff) \ - r = ((r) < 0) ? 0 : 0xff; \ - if ((g) & ~0xff) \ - g = ((g) < 0) ? 0 : 0xff; \ - if ((b) & ~0xff) \ - b = ((b) < 0) ? 0 : 0xff; \ - if ((a) & ~0xff) \ - a = ((a) < 0) ? 0 : 0xff; +#define CLAMP_RGBA(r, g, b, a) \ + if ((r) & ~0xff) \ + r = ((r) < 0) ? 0 : 0xff; \ + if ((g) & ~0xff) \ + g = ((g) < 0) ? 0 : 0xff; \ + if ((b) & ~0xff) \ + b = ((b) < 0) ? 0 : 0xff; \ + if ((a) & ~0xff) \ + a = ((a) < 0) ? 0 : 0xff; -#define CLAMP_RGB(r, g, b) \ - do { \ - if ((r) < 0) \ - r = 0; \ - if ((r) > 0xff) \ - r = 0xff; \ - if ((g) < 0) \ - g = 0; \ - if ((g) > 0xff) \ - g = 0xff; \ - if ((b) < 0) \ - b = 0; \ - if ((b) > 0xff) \ - b = 0xff; \ +#define CLAMP_RGB(r, g, b) \ + do { \ + if ((r) < 0) \ + r = 0; \ + if ((r) > 0xff) \ + r = 0xff; \ + if ((g) < 0) \ + g = 0; \ + if ((g) > 0xff) \ + g = 0xff; \ + if ((b) < 0) \ + b = 0; \ + if ((b) > 0xff) \ + b = 0xff; \ } while (0) static void -dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) -{ +dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) { state->dest_rgba.r = state->r >> 7; CLAMP(state->dest_rgba.r); @@ -3323,8 +3109,7 @@ dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) } static void -dest_pixel_unlit_texture_triangle(s3d_state_t *state) -{ +dest_pixel_unlit_texture_triangle(s3d_state_t *state) { tex_sample(state); if (state->cmd_set & CMD_SET_ABC_SRC) @@ -3332,8 +3117,7 @@ dest_pixel_unlit_texture_triangle(s3d_state_t *state) } static void -dest_pixel_lit_texture_decal(s3d_state_t *state) -{ +dest_pixel_lit_texture_decal(s3d_state_t *state) { tex_sample(state); if (state->cmd_set & CMD_SET_ABC_SRC) @@ -3341,8 +3125,7 @@ dest_pixel_lit_texture_decal(s3d_state_t *state) } static void -dest_pixel_lit_texture_reflection(s3d_state_t *state) -{ +dest_pixel_lit_texture_reflection(s3d_state_t *state) { tex_sample(state); state->dest_rgba.r += (state->r >> 7); @@ -3355,8 +3138,7 @@ dest_pixel_lit_texture_reflection(s3d_state_t *state) } static void -dest_pixel_lit_texture_modulate(s3d_state_t *state) -{ +dest_pixel_lit_texture_modulate(s3d_state_t *state) { int r = state->r >> 7; int g = state->g >> 7; int b = state->b >> 7; @@ -3375,39 +3157,18 @@ dest_pixel_lit_texture_modulate(s3d_state_t *state) } static void -tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) -{ - svga_t *svga = &virge->svga; - uint8_t *vram = svga->vram; +tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) { + svga_t *svga = &virge->svga; + uint8_t *vram = virge->svga.vram; + int x_dir = s3d_tri->tlr ? 1 : -1; + int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); + int y_count = yc; + int bpp = (s3d_tri->cmd_set >> 2) & 7; + uint32_t dest_offset; + uint32_t z_offset; - int x_dir = s3d_tri->tlr ? 1 : -1; - - int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); - - int y_count = yc; - - int bpp = (s3d_tri->cmd_set >> 2) & 7; - - uint32_t dest_offset = 0; - uint32_t z_offset = 0; - - uint32_t src_col; - int src_r = 0; - int src_g = 0; - int src_b = 0; - - int x; - int xe; - uint32_t z; - - uint32_t dest_addr; - uint32_t z_addr; - int dx; - int x_offset; - int xz_offset; - - int update; - uint16_t src_z = 0; + dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str); + z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str); if (s3d_tri->cmd_set & CMD_SET_HC) { if (state->y < s3d_tri->clip_t) @@ -3427,240 +3188,194 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int state->base_a += (s3d_tri->TdAdY * diff_y); state->base_d += (s3d_tri->TdDdY * diff_y); state->base_w += (s3d_tri->TdWdY * diff_y); - state->x1 += (dx1 * diff_y); - state->x2 += (dx2 * diff_y); - state->y -= diff_y; - dest_offset -= s3d_tri->dest_str * diff_y; - z_offset -= s3d_tri->z_str; - y_count -= diff_y; + state->x1 += (dx1 * diff_y); + state->x2 += (dx2 * diff_y); + state->y -= diff_y; + dest_offset -= s3d_tri->dest_str; + z_offset -= s3d_tri->z_str; + y_count -= diff_y; } if ((state->y - y_count) < s3d_tri->clip_t) y_count = (state->y - s3d_tri->clip_t) + 1; } - dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str); - z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str); + for (; y_count > 0; y_count--) { + int x = (state->x1 + ((1 << 20) - 1)) >> 20; + int xe = (state->x2 + ((1 << 20) - 1)) >> 20; + uint32_t z = (state->base_z > 0) ? (state->base_z << 1) : 0; - while (y_count > 0) { - x = (state->x1 + ((1 << 20) - 1)) >> 20; - xe = (state->x2 + ((1 << 20) - 1)) >> 20; - z = (state->base_z > 0) ? (state->base_z << 1) : 0; - if (x_dir < 0) { - x--; - xe--; + if (x_dir < 0) { + x--; + xe--; + } + + if (x != xe && ((x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))) { + uint32_t dest_addr; + uint32_t z_addr; + int dx = (x_dir > 0) ? ((31 - ((state->x1 - 1) >> 15)) & 0x1f) : + (((state->x1 - 1) >> 15) & 0x1f); + int x_offset = x_dir * (bpp + 1); + int xz_offset = x_dir << 1; + + if (x_dir > 0) + dx += 1; + + state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5); + state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5); + state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5); + state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5); + state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5); + state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5); + state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5); + state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5); + z += ((s3d_tri->TdZdX * dx) >> 5); + + if (s3d_tri->cmd_set & CMD_SET_HC) { + if (x_dir > 0) { + if (x > s3d_tri->clip_r) + goto tri_skip_line; + if (xe < s3d_tri->clip_l) + goto tri_skip_line; + if (xe > s3d_tri->clip_r) + xe = s3d_tri->clip_r + 1; + if (x < s3d_tri->clip_l) { + int diff_x = s3d_tri->clip_l - x; + + z += (s3d_tri->TdZdX * diff_x); + state->u += (s3d_tri->TdUdX * diff_x); + state->v += (s3d_tri->TdVdX * diff_x); + state->r += (s3d_tri->TdRdX * diff_x); + state->g += (s3d_tri->TdGdX * diff_x); + state->b += (s3d_tri->TdBdX * diff_x); + state->a += (s3d_tri->TdAdX * diff_x); + state->d += (s3d_tri->TdDdX * diff_x); + state->w += (s3d_tri->TdWdX * diff_x); + + x = s3d_tri->clip_l; + } + } else { + if (x < s3d_tri->clip_l) + goto tri_skip_line; + if (xe > s3d_tri->clip_r) + goto tri_skip_line; + if (xe < s3d_tri->clip_l) + xe = s3d_tri->clip_l - 1; + if (x > s3d_tri->clip_r) { + int diff_x = x - s3d_tri->clip_r; + + z += (s3d_tri->TdZdX * diff_x); + state->u += (s3d_tri->TdUdX * diff_x); + state->v += (s3d_tri->TdVdX * diff_x); + state->r += (s3d_tri->TdRdX * diff_x); + state->g += (s3d_tri->TdGdX * diff_x); + state->b += (s3d_tri->TdBdX * diff_x); + state->a += (s3d_tri->TdAdX * diff_x); + state->d += (s3d_tri->TdDdX * diff_x); + state->w += (s3d_tri->TdWdX * diff_x); + + x = s3d_tri->clip_r; + } + } + } + + virge->svga.changedvram[(dest_offset & svga->vram_mask) >> 12] = changeframecount; + + dest_addr = dest_offset + (x * (bpp + 1)); + z_addr = z_offset + (x << 1); + + x &= 0xfff; + xe &= 0xfff; + + for (; x != xe; x = (x + x_dir) & 0xfff) { + int update = 1; + uint16_t src_z = 0; + + _x = x; + _y = state->y; + + if (use_z) { + src_z = Z_READ(z_addr); + Z_CLIP(src_z, z >> 16); + } + + if (update) { + uint32_t dest_col; + + dest_pixel(state); + + if (s3d_tri->cmd_set & CMD_SET_FE) { + int a = state->a >> 7; + state->dest_rgba.r = ((state->dest_rgba.r * a) + (s3d_tri->fog_r * (255 - a))) / 255; + state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; + state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; + } + + if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { + uint32_t src_col; + int src_r = 0; + uint32_t src_g = 0; + uint32_t src_b = 0; + + switch (bpp) { + case 0: /*8 bpp*/ + /*Not implemented yet*/ + break; + case 1: /*16 bpp*/ + src_col = *(uint16_t *)&vram[dest_addr & svga->vram_mask]; + RGB15_TO_24(src_col, src_r, src_g, src_b); + break; + case 2: /*24 bpp*/ + src_col = (*(uint32_t *)&vram[dest_addr & svga->vram_mask]) & 0xffffff; + RGB24_TO_24(src_col, src_r, src_g, src_b); + break; + } + + state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + + (src_r * (255 - state->dest_rgba.a))) / 255; + state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + + (src_g * (255 - state->dest_rgba.a))) / 255; + state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + + (src_b * (255 - state->dest_rgba.a))) / 255; + } + + switch (bpp) { + case 0: /*8 bpp*/ + /*Not implemented yet*/ + break; + case 1: /*16 bpp*/ + RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col); + *(uint16_t *)&vram[dest_addr] = dest_col; + break; + case 2: /*24 bpp*/ + dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); + *(uint8_t *)&vram[dest_addr] = dest_col & 0xff; + *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff; + *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff; + break; + } + + if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP)) + Z_WRITE(z_addr, src_z); + } + + z += s3d_tri->TdZdX; + state->u += s3d_tri->TdUdX; + state->v += s3d_tri->TdVdX; + state->r += s3d_tri->TdRdX; + state->g += s3d_tri->TdGdX; + state->b += s3d_tri->TdBdX; + state->a += s3d_tri->TdAdX; + state->d += s3d_tri->TdDdX; + state->w += s3d_tri->TdWdX; + dest_addr += x_offset; + z_addr += xz_offset; + virge->pixel_count++; + } } - - if (((x != xe) && ((x_dir > 0) && (x < xe))) || ((x_dir < 0) && (x > xe))) { - dx = (x_dir > 0) ? ((31 - ((state->x1 - 1) >> 15)) & 0x1f) : (((state->x1 - 1) >> 15) & 0x1f); - x_offset = x_dir * (bpp + 1); - xz_offset = x_dir << 1; - if (x_dir > 0) - dx += 1; - state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5); - state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5); - state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5); - state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5); - state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5); - state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5); - state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5); - state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5); - z += ((s3d_tri->TdZdX * dx) >> 5); - - if (s3d_tri->cmd_set & CMD_SET_HC) { - if (x_dir > 0) { - if (x > s3d_tri->clip_r) - goto tri_skip_line; - if (xe < s3d_tri->clip_l) - goto tri_skip_line; - if (xe > s3d_tri->clip_r) - xe = s3d_tri->clip_r + 1; - if (x < s3d_tri->clip_l) { - int diff_x = s3d_tri->clip_l - x; - - z += (s3d_tri->TdZdX * diff_x); - state->u += (s3d_tri->TdUdX * diff_x); - state->v += (s3d_tri->TdVdX * diff_x); - state->r += (s3d_tri->TdRdX * diff_x); - state->g += (s3d_tri->TdGdX * diff_x); - state->b += (s3d_tri->TdBdX * diff_x); - state->a += (s3d_tri->TdAdX * diff_x); - state->d += (s3d_tri->TdDdX * diff_x); - state->w += (s3d_tri->TdWdX * diff_x); - - x = s3d_tri->clip_l; - } - } else { - if (x < s3d_tri->clip_l) - goto tri_skip_line; - if (xe > s3d_tri->clip_r) - goto tri_skip_line; - if (xe < s3d_tri->clip_l) - xe = s3d_tri->clip_l - 1; - if (x > s3d_tri->clip_r) { - int diff_x = x - s3d_tri->clip_r; - - z += (s3d_tri->TdZdX * diff_x); - state->u += (s3d_tri->TdUdX * diff_x); - state->v += (s3d_tri->TdVdX * diff_x); - state->r += (s3d_tri->TdRdX * diff_x); - state->g += (s3d_tri->TdGdX * diff_x); - state->b += (s3d_tri->TdBdX * diff_x); - state->a += (s3d_tri->TdAdX * diff_x); - state->d += (s3d_tri->TdDdX * diff_x); - state->w += (s3d_tri->TdWdX * diff_x); - - x = s3d_tri->clip_r; - } - } - } - - svga->changedvram[(dest_offset & virge->vram_mask) >> 12] = changeframecount; - - dest_addr = dest_offset + (x * (bpp + 1)); - z_addr = z_offset + (x << 1); - - x &= 0xfff; - xe &= 0xfff; - - while (x != xe) { - update = 1; - _x = x; - _y = state->y; - - if (use_z) { - src_z = *(uint16_t *) &vram[z_addr & virge->vram_mask]; - switch ((s3d_tri->cmd_set >> 20) & 7) { - case 0: - update = 0; - break; - case 1: - if ((z >> 16) > src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 2: - if ((z >> 16) == src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 3: - if ((z >> 16) >= src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 4: - if ((z >> 16) < src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 5: - if ((z >> 16) != src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 6: - if ((z >> 16) <= src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 7: - src_z = (z >> 16); - break; - - default: - break; - } - } - - if (update) { - uint32_t dest_col; - - dest_pixel(state); - - if (s3d_tri->cmd_set & CMD_SET_FE) { - int a = state->a >> 7; - state->dest_rgba.r = ((state->dest_rgba.r * a) + (s3d_tri->fog_r * (255 - a))) / 255; - state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; - state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; - } - - if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { - switch (bpp) { - case 0: /*8 bpp*/ - /*TODO: Not implemented yet*/ - break; - case 1: /*16 bpp*/ - src_col = *(uint16_t *) &vram[dest_addr & virge->vram_mask]; - RGB15_TO_24(src_col, src_r, src_g, src_b); - break; - case 2: /*24 bpp*/ - src_col = (*(uint32_t *) &vram[dest_addr & virge->vram_mask]) & 0xffffff; - RGB24_TO_24(src_col, src_r, src_g, src_b); - break; - - default: - break; - } - - state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255; - state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255; - state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255; - } - - switch (bpp) { - case 0: /*8 bpp*/ - /*TODO: Not implemented yet*/ - break; - case 1: /*16 bpp*/ - RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col); - *(uint16_t *) &vram[dest_addr & virge->vram_mask] = dest_col; - svga->changedvram[(dest_addr & virge->vram_mask) >> 12] = changeframecount; - break; - case 2: /*24 bpp*/ - dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); - *(uint8_t *) &vram[dest_addr & virge->vram_mask] = dest_col & 0xff; - *(uint8_t *) &vram[(dest_addr + 1) & virge->vram_mask] = (dest_col >> 8) & 0xff; - *(uint8_t *) &vram[(dest_addr + 2) & virge->vram_mask] = (dest_col >> 16) & 0xff; - svga->changedvram[(dest_addr & virge->vram_mask) >> 12] = changeframecount; - break; - - default: - break; - } - } - - if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP)) { - *(uint16_t *) &vram[z_addr & virge->vram_mask] = src_z; - svga->changedvram[(z_addr & virge->vram_mask) >> 12] = changeframecount; - } - - z += s3d_tri->TdZdX; - state->u += s3d_tri->TdUdX; - state->v += s3d_tri->TdVdX; - state->r += s3d_tri->TdRdX; - state->g += s3d_tri->TdGdX; - state->b += s3d_tri->TdBdX; - state->a += s3d_tri->TdAdX; - state->d += s3d_tri->TdDdX; - state->w += s3d_tri->TdWdX; - dest_addr += x_offset; - z_addr += xz_offset; - - x = (x + x_dir) & 0xfff; - } - } - - y_count--; - + tri_skip_line: - state->x1 += dx1; - state->x2 += dx2; + state->x1 += dx1; + state->x2 += dx2; state->base_u += s3d_tri->TdUdY; state->base_v += s3d_tri->TdVdY; state->base_z += s3d_tri->TdZdY; @@ -3671,31 +3386,22 @@ tri_skip_line: state->base_d += s3d_tri->TdDdY; state->base_w += s3d_tri->TdWdY; state->y--; - dest_offset -= s3d_tri->dest_str; - z_offset -= s3d_tri->z_str; + dest_offset -= s3d_tri->dest_str; + z_offset -= s3d_tri->z_str; } } -static int tex_size[8] = { - 4 * 2, - 2 * 2, - 2 * 2, - 1 * 2, - 2 / 1, - 2 / 1, - 1 * 2, - 1 * 2 -}; +static int tex_size[8] = {4 * 2, 2 * 2, 2 * 2, 1 * 2, 2 / 1, 2 / 1, 1 * 2, 1 * 2}; static void -s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) -{ +s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) { s3d_state_t state; - uint32_t tex_base; + uint32_t tex_base; + int c; - uint64_t start_time = plat_timer_read(); - uint64_t end_time; + uint64_t start_time = plat_timer_read(); + uint64_t end_time; state.tbu = s3d_tri->tbu << 11; state.tbv = s3d_tri->tbv << 11; @@ -3709,18 +3415,18 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) state.base_u = s3d_tri->tus; state.base_v = s3d_tri->tvs; state.base_z = s3d_tri->tzs; - state.base_r = (int32_t) s3d_tri->trs; - state.base_g = (int32_t) s3d_tri->tgs; - state.base_b = (int32_t) s3d_tri->tbs; - state.base_a = (int32_t) s3d_tri->tas; + state.base_r = (int32_t)s3d_tri->trs; + state.base_g = (int32_t)s3d_tri->tgs; + state.base_b = (int32_t)s3d_tri->tbs; + state.base_a = (int32_t)s3d_tri->tas; state.base_d = s3d_tri->tds; state.base_w = s3d_tri->tws; tex_base = s3d_tri->tex_base; - for (int c = 9; c >= 0; c--) { - state.texture[c] = (uint16_t *) &virge->svga.vram[tex_base]; - if (c <= state.max_d) - tex_base += ((1 << (c * 2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2; + for (c = 9; c >= 0; c--) { + state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base]; + if (c <= state.max_d) + tex_base += ((1 << (c * 2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2; } switch ((s3d_tri->cmd_set >> 27) & 0xf) { @@ -3740,7 +3446,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) dest_pixel = dest_pixel_lit_texture_decal; break; default: - s3_virge_log("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf); return; } break; @@ -3749,7 +3454,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) dest_pixel = dest_pixel_unlit_texture_triangle; break; default: - s3_virge_log("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf); return; } @@ -3772,34 +3476,35 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) break; case (0 | 8): case (1 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) tex_sample = tex_sample_persp_mipmap_375; else tex_sample = tex_sample_persp_mipmap; break; case (2 | 8): case (3 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) - tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375; + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) + tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : + tex_sample_persp_mipmap_375; else - tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap; + tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : + tex_sample_persp_mipmap; break; case (4 | 8): case (5 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) tex_sample = tex_sample_persp_normal_375; else tex_sample = tex_sample_persp_normal; break; case (6 | 8): case (7 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) - tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375; + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) + tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : + tex_sample_persp_normal_375; else - tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal; - break; - - default: + tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : + tex_sample_persp_normal; break; } @@ -3814,7 +3519,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; break; default: - s3_virge_log("bad texture type %i\n", (s3d_tri->cmd_set >> 5) & 7); tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; break; } @@ -3826,15 +3530,16 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) state.x2 = s3d_tri->txend12; tri(virge, s3d_tri, &state, s3d_tri->ty12, s3d_tri->TdXdY02, s3d_tri->TdXdY12); + virge->tri_count++; + end_time = plat_timer_read(); - virge->blitter_time += end_time - start_time; + virge_time += end_time - start_time; } static void -s3_virge_render_thread(void *priv) -{ - virge_t *virge = (virge_t *) priv; +render_thread(void *param) { + virge_t *virge = (virge_t *)param; while (virge->render_thread_run) { thread_wait_event(virge->wake_render_thread, -1); @@ -3844,7 +3549,7 @@ s3_virge_render_thread(void *priv) s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]); virge->s3d_read_idx++; - if (RB_ENTRIES == RB_MASK) + if (RB_ENTRIES == (RB_SIZE - 1)) thread_set_event(virge->not_full_event); } virge->s3d_busy = 0; @@ -3854,30 +3559,27 @@ s3_virge_render_thread(void *priv) } static void -queue_triangle(virge_t *virge) -{ +queue_triangle(virge_t *virge) { if (RB_FULL) { thread_reset_event(virge->not_full_event); if (RB_FULL) thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/ } - virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri; virge->s3d_write_idx++; if (!virge->s3d_busy) thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/ } -static void -s3_virge_hwcursor_draw(svga_t *svga, int displine) -{ - const virge_t *virge = (virge_t *) svga->priv; - uint16_t dat[2]; - int xx; - int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; - uint32_t fg; - uint32_t bg; - uint32_t vram_mask = virge->vram_mask; +static void s3_virge_hwcursor_draw(svga_t *svga, int displine) { + virge_t *virge = (virge_t *) svga->priv; + int x; + uint16_t dat[2] = { 0, 0 }; + int xx; + int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; + uint32_t fg; + uint32_t bg; + uint32_t vram_mask = virge->vram_mask; if (svga->interlace && svga->hwcursor_oddeven) svga->hwcursor_latch.addr += 16; @@ -3914,13 +3616,16 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) break; } - for (uint8_t x = 0; x < 64; x += 16) { - dat[0] = (svga->vram[svga->hwcursor_latch.addr & vram_mask] << 8) | svga->vram[(svga->hwcursor_latch.addr + 1) & vram_mask]; - dat[1] = (svga->vram[(svga->hwcursor_latch.addr + 2) & vram_mask] << 8) | svga->vram[(svga->hwcursor_latch.addr + 3) & vram_mask]; + for (x = 0; x < 64; x += 16) { + dat[0] = (svga->vram[svga->hwcursor_latch.addr & vram_mask] << 8) | + svga->vram[(svga->hwcursor_latch.addr + 1) & vram_mask]; + dat[1] = (svga->vram[(svga->hwcursor_latch.addr + 2) & vram_mask] << 8) | + svga->vram[(svga->hwcursor_latch.addr + 3) & vram_mask]; + if (svga->crtc[0x55] & 0x10) { /*X11*/ for (xx = 0; xx < 16; xx++) { - if (offset >= 0) { + if (offset >= svga->hwcursor_latch.x) { if (virge->chip == S3_VIRGEGX2) dat[0] ^= 0x8000; @@ -3929,13 +3634,14 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } offset++; + dat[0] <<= 1; dat[1] <<= 1; } } else { /*Windows*/ for (xx = 0; xx < 16; xx++) { - if (offset >= 0) { + if (offset >= svga->hwcursor_latch.x) { if (!(dat[0] & 0x8000)) buffer32->line[displine][offset + svga->x_add] = (dat[1] & 0x8000) ? fg : bg; else if (dat[1] & 0x8000) @@ -3943,101 +3649,109 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } offset++; + dat[0] <<= 1; dat[1] <<= 1; } } + svga->hwcursor_latch.addr += 4; } + if (svga->interlace && !svga->hwcursor_oddeven) svga->hwcursor_latch.addr += 16; } -#define DECODE_YCbCr() \ - do { \ - int c; \ - \ - for (c = 0; c < 2; c++) { \ - uint8_t y1, y2; \ - int8_t Cr, Cb; \ - int dR, dG, dB; \ - \ - y1 = src[0]; \ - Cr = src[1] - 0x80; \ - y2 = src[2]; \ - Cb = src[3] - 0x80; \ - src += 4; \ - \ - dR = (359 * Cr) >> 8; \ - dG = (88 * Cb + 183 * Cr) >> 8; \ - dB = (453 * Cb) >> 8; \ - \ - r[x_write] = y1 + dR; \ - CLAMP(r[x_write]); \ - g[x_write] = y1 - dG; \ - CLAMP(g[x_write]); \ - b[x_write] = y1 + dB; \ - CLAMP(b[x_write]); \ - \ - r[x_write + 1] = y2 + dR; \ - CLAMP(r[x_write + 1]); \ - g[x_write + 1] = y2 - dG; \ - CLAMP(g[x_write + 1]); \ - b[x_write + 1] = y2 + dB; \ - CLAMP(b[x_write + 1]); \ - \ - x_write = (x_write + 2) & 7; \ - } \ +#define DECODE_YCbCr() \ + do { \ + int c; \ + \ + for (c = 0; c < 2; c++) { \ + uint8_t y1, y2; \ + int8_t Cr; \ + int8_t Cb; \ + int dR; \ + int dG; \ + int dB; \ + \ + y1 = src[0]; \ + Cr = src[1] - 0x80; \ + y2 = src[2]; \ + Cb = src[3] - 0x80; \ + src += 4; \ + \ + dR = (359 * Cr) >> 8; \ + dG = (88 * Cb + 183 * Cr) >> 8; \ + dB = (453 * Cb) >> 8; \ + \ + r[x_write] = y1 + dR; \ + CLAMP(r[x_write]); \ + g[x_write] = y1 - dG; \ + CLAMP(g[x_write]); \ + b[x_write] = y1 + dB; \ + CLAMP(b[x_write]); \ + \ + r[x_write + 1] = y2 + dR; \ + CLAMP(r[x_write + 1]); \ + g[x_write + 1] = y2 - dG; \ + CLAMP(g[x_write + 1]); \ + b[x_write + 1] = y2 + dB; \ + CLAMP(b[x_write + 1]); \ + \ + x_write = (x_write + 2) & 7; \ + } \ } while (0) /*Both YUV formats are untested*/ -#define DECODE_YUV211() \ - do { \ - uint8_t y1, y2, y3, y4; \ - int8_t U, V; \ - int dR, dG, dB; \ - \ - U = src[0] - 0x80; \ - y1 = (298 * (src[1] - 16)) >> 8; \ - y2 = (298 * (src[2] - 16)) >> 8; \ - V = src[3] - 0x80; \ - y3 = (298 * (src[4] - 16)) >> 8; \ - y4 = (298 * (src[5] - 16)) >> 8; \ - src += 6; \ - \ - dR = (309 * V) >> 8; \ - dG = (100 * U + 208 * V) >> 8; \ - dB = (516 * U) >> 8; \ - \ - r[x_write] = y1 + dR; \ - CLAMP(r[x_write]); \ - g[x_write] = y1 - dG; \ - CLAMP(g[x_write]); \ - b[x_write] = y1 + dB; \ - CLAMP(b[x_write]); \ - \ - r[x_write + 1] = y2 + dR; \ - CLAMP(r[x_write + 1]); \ - g[x_write + 1] = y2 - dG; \ - CLAMP(g[x_write + 1]); \ - b[x_write + 1] = y2 + dB; \ - CLAMP(b[x_write + 1]); \ - \ - r[x_write + 2] = y3 + dR; \ - CLAMP(r[x_write + 2]); \ - g[x_write + 2] = y3 - dG; \ - CLAMP(g[x_write + 2]); \ - b[x_write + 2] = y3 + dB; \ - CLAMP(b[x_write + 2]); \ - \ - r[x_write + 3] = y4 + dR; \ - CLAMP(r[x_write + 3]); \ - g[x_write + 3] = y4 - dG; \ - CLAMP(g[x_write + 3]); \ - b[x_write + 3] = y4 + dB; \ - CLAMP(b[x_write + 3]); \ - \ - x_write = (x_write + 4) & 7; \ +#define DECODE_YUV211() \ + do { \ + uint8_t y1, y2, y3, y4; \ + int8_t U, V; \ + int dR; \ + int dG; \ + int dB; \ + \ + U = src[0] - 0x80; \ + y1 = (298 * (src[1] - 16)) >> 8; \ + y2 = (298 * (src[2] - 16)) >> 8; \ + V = src[3] - 0x80; \ + y3 = (298 * (src[4] - 16)) >> 8; \ + y4 = (298 * (src[5] - 16)) >> 8; \ + src += 6; \ + \ + dR = (309 * V) >> 8; \ + dG = (100 * U + 208 * V) >> 8; \ + dB = (516 * U) >> 8; \ + \ + r[x_write] = y1 + dR; \ + CLAMP(r[x_write]); \ + g[x_write] = y1 - dG; \ + CLAMP(g[x_write]); \ + b[x_write] = y1 + dB; \ + CLAMP(b[x_write]); \ + \ + r[x_write + 1] = y2 + dR; \ + CLAMP(r[x_write + 1]); \ + g[x_write + 1] = y2 - dG; \ + CLAMP(g[x_write + 1]); \ + b[x_write + 1] = y2 + dB; \ + CLAMP(b[x_write + 1]); \ + \ + r[x_write + 2] = y3 + dR; \ + CLAMP(r[x_write + 2]); \ + g[x_write + 2] = y3 - dG; \ + CLAMP(g[x_write + 2]); \ + b[x_write + 2] = y3 + dB; \ + CLAMP(b[x_write + 2]); \ + \ + r[x_write + 3] = y4 + dR; \ + CLAMP(r[x_write + 3]); \ + g[x_write + 3] = y4 - dG; \ + CLAMP(g[x_write + 3]); \ + b[x_write + 3] = y4 + dB; \ + CLAMP(b[x_write + 3]); \ + \ + x_write = (x_write + 4) & 7; \ } while (0) #define DECODE_YUV422() \ @@ -4045,13 +3759,17 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) int c; \ \ for (c = 0; c < 2; c++) { \ - uint8_t y1, y2; \ - int8_t U, V; \ - int dR, dG, dB; \ + uint8_t y1; \ + uint8_t y2; \ + int8_t U; \ + int8_t V; \ + int dR; \ + int dG; \ + int dB; \ \ - U = src[0] - 0x80; \ + U = src[0] - 0x80; \ y1 = (298 * (src[1] - 16)) >> 8; \ - V = src[2] - 0x80; \ + V = src[2] - 0x80; \ y2 = (298 * (src[3] - 16)) >> 8; \ src += 4; \ \ @@ -4077,110 +3795,122 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } \ } while (0) -#define DECODE_RGB555() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - uint16_t dat; \ - \ - dat = *(uint16_t *) src; \ - src += 2; \ - \ - r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \ - g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \ - b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB555() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + uint16_t dat; \ + \ + dat = *(uint16_t *)src; \ + src += 2; \ + \ + r[x_write + c] = \ + ((dat & 0x001f) << 3) | \ + ((dat & 0x001f) >> 2); \ + g[x_write + c] = \ + ((dat & 0x03e0) >> 2) | \ + ((dat & 0x03e0) >> 7); \ + b[x_write + c] = \ + ((dat & 0x7c00) >> 7) | \ + ((dat & 0x7c00) >> 12); \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_RGB565() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - uint16_t dat; \ - \ - dat = *(uint16_t *) src; \ - src += 2; \ - \ - r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \ - g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \ - b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB565() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + uint16_t dat; \ + \ + dat = *(uint16_t *)src; \ + src += 2; \ + \ + r[x_write + c] = \ + ((dat & 0x001f) << 3) | \ + ((dat & 0x001f) >> 2); \ + g[x_write + c] = \ + ((dat & 0x07e0) >> 3) | \ + ((dat & 0x07e0) >> 9); \ + b[x_write + c] = \ + ((dat & 0xf800) >> 8) | \ + ((dat & 0xf800) >> 13); \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_RGB888() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - r[x_write + c] = src[0]; \ - g[x_write + c] = src[1]; \ - b[x_write + c] = src[2]; \ - src += 3; \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB888() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + r[x_write + c] = src[0]; \ + g[x_write + c] = src[1]; \ + b[x_write + c] = src[2]; \ + src += 3; \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_XRGB8888() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - r[x_write + c] = src[0]; \ - g[x_write + c] = src[1]; \ - b[x_write + c] = src[2]; \ - src += 4; \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_XRGB8888() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + r[x_write + c] = src[0]; \ + g[x_write + c] = src[1]; \ + b[x_write + c] = src[2]; \ + src += 4; \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define OVERLAY_SAMPLE() \ - do { \ - switch (virge->streams.sdif) { \ - case 1: \ - DECODE_YCbCr(); \ - break; \ - case 2: \ - DECODE_YUV422(); \ - break; \ - case 3: \ - DECODE_RGB555(); \ - break; \ - case 4: \ - DECODE_YUV211(); \ - break; \ - case 5: \ - DECODE_RGB565(); \ - break; \ - case 6: \ - DECODE_RGB888(); \ - break; \ - case 7: \ - default: \ - DECODE_XRGB8888(); \ - break; \ - } \ +#define OVERLAY_SAMPLE() \ + do { \ + switch (virge->streams.sdif) { \ + case 1: \ + DECODE_YCbCr(); \ + break; \ + case 2: \ + DECODE_YUV422(); \ + break; \ + case 3: \ + DECODE_RGB555(); \ + break; \ + case 4: \ + DECODE_YUV211(); \ + break; \ + case 5: \ + DECODE_RGB565(); \ + break; \ + case 6: \ + DECODE_RGB888(); \ + break; \ + case 7: \ + default: \ + DECODE_XRGB8888(); \ + break; \ + } \ } while (0) static void -s3_virge_overlay_draw(svga_t *svga, int displine) -{ - const virge_t *virge = (virge_t *) svga->priv; - int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1; - int h_acc = virge->streams.dda_horiz_accumulator; - int r[8]; - int g[8]; - int b[8]; - int x_size; - int x_read = 4; - int x_write = 4; - uint32_t *p; - uint8_t *src = &svga->vram[svga->overlay_latch.addr]; +s3_virge_overlay_draw(svga_t *svga, int displine) { + virge_t *virge = (virge_t *) svga->priv; + int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1; + int h_acc = virge->streams.dda_horiz_accumulator; + int r[8]; + int g[8]; + int b[8]; + int x_size; + int x_read = 4; + int x_write = 4; + int x; + uint32_t *p; + uint8_t *src = &svga->vram[svga->overlay_latch.addr]; - p = &(buffer32->line[displine][offset + svga->x_add]); + p = &((uint32_t *)buffer32->line[displine])[offset + svga->x_add]; if ((offset + virge->streams.sec_w) > virge->streams.pri_w) x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1; @@ -4189,7 +3919,7 @@ s3_virge_overlay_draw(svga_t *svga, int displine) OVERLAY_SAMPLE(); - for (int x = 0; x < x_size; x++) { + for (x = 0; x < x_size; x++) { *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16); h_acc += virge->streams.k1_horiz_scale; @@ -4507,7 +4237,7 @@ static void * s3_virge_init(const device_t *info) { const char *bios_fn; - virge_t *virge = calloc(1, sizeof(virge_t)); + virge_t *virge = (virge_t *) calloc(1, sizeof(virge_t)); reset_state = calloc(1, sizeof(virge_t)); virge->bilinear_enabled = device_get_config_int("bilinear"); @@ -4742,12 +4472,12 @@ s3_virge_init(const device_t *info) virge->wake_render_thread = thread_create_event(); virge->wake_main_thread = thread_create_event(); virge->not_full_event = thread_create_event(); - virge->render_thread = thread_create(s3_virge_render_thread, virge); + virge->render_thread = thread_create(render_thread, virge); virge->fifo_thread_run = 1; virge->wake_fifo_thread = thread_create_event(); virge->fifo_not_full_event = thread_create_event(); - virge->fifo_thread = thread_create(s3_virge_fifo_thread, virge); + virge->fifo_thread = thread_create(fifo_thread, virge); virge->local = info->local; From 88741b021e8b67f0140fe4c01ac47aec93462d31 Mon Sep 17 00:00:00 2001 From: OBattler Date: Fri, 16 Aug 2024 05:58:24 +0200 Subject: [PATCH 15/26] S3 ViRGE: Some minor memory mask fixes. --- src/video/vid_s3_virge.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index aecd309e3..155380480 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -1661,7 +1661,8 @@ fifo_thread(void *param) { virge->s3d_tri.z_str = val & 0xff8; break; case 0xb4ec: - virge->s3d_tri.tex_base = val & 0x3ffff8; + virge->s3d_tri.tex_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); break; case 0xb4f0: virge->s3d_tri.tex_bdr_clr = val & 0xffffff; @@ -1900,12 +1901,14 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { virge->streams.blend_ctrl = val; break; case 0x81c0: - virge->streams.pri_fb0 = val & 0x3fffff; + virge->streams.pri_fb0 = val & ((virge->memory_size == 8) ? + (val & 0x7fffff) : (val & 0x3fffff)); svga_recalctimings(svga); svga->fullchange = changeframecount; break; case 0x81c4: - virge->streams.pri_fb1 = val & 0x3fffff; + virge->streams.pri_fb1 = ((virge->memory_size == 8) ? + (val & 0x7fffff) : (val & 0x3fffff)); svga_recalctimings(svga); svga->fullchange = changeframecount; break; From dba4fe373f3870f044362623907aedc15cbbcafe Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Fri, 16 Aug 2024 16:24:49 -0400 Subject: [PATCH 16/26] Add a link to the build guide to README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b4b4142be..ce38632e6 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,14 @@ We operate an IRC channel and a Discord server for discussing 86Box, its develop [![Visit our Discord server](https://discordapp.com/api/guilds/262614059009048590/embed.png)](https://discord.gg/QXK9XTv) Contributions ---------- +------------- We welcome all contributions to the project, as long as the [contribution guidelines](CONTRIBUTING.md) are followed. +Building +--------- +For instructions on how to build 86Box from source, see the [build guide](https://86box.readthedocs.io/en/latest/dev/buildguide.html). + Licensing --------- From 3611765e50d610f1626d4d59b063b3630a7988e0 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Fri, 16 Aug 2024 16:31:35 -0400 Subject: [PATCH 17/26] Delete 86Box-install-roms.sh --- scripts/86Box-install-roms.sh | 73 ----------------------------------- 1 file changed, 73 deletions(-) delete mode 100755 scripts/86Box-install-roms.sh diff --git a/scripts/86Box-install-roms.sh b/scripts/86Box-install-roms.sh deleted file mode 100755 index b13d93acc..000000000 --- a/scripts/86Box-install-roms.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/sh - -URL="https://github.com/86Box/roms/archive/refs/tags/v4.2.zip" -TMP_FILE="/tmp/86Box-ROMS.zip" -EXTRACT_DIR="/tmp/86Box-ROMS-extracted" -DEFAULT_TARGET_DIR="$HOME/.local/share/86Box/roms/" -TARGET_DIR=${TARGET_DIR:-$DEFAULT_TARGET_DIR} - -install_roms() { - if [ -d "$TARGET_DIR" ] && [ "$(ls -A $TARGET_DIR)" ]; then - echo "ROMS already installed in $TARGET_DIR" - echo "To (re)install, please first remove ROMS with -r parameter" - exit 1 - fi - fetch -o "$TMP_FILE" "$URL" - - if [ $? -ne 0 ]; then - echo "Failed to download the file from $URL" - exit 1 - fi - - mkdir -p "$EXTRACT_DIR" - unzip "$TMP_FILE" -d "$EXTRACT_DIR" - - if [ $? -ne 0 ]; then - echo "Failed to decompress the file" - rm "$TMP_FILE" - exit 1 - fi - - mkdir -p "$TARGET_DIR" - cd "$EXTRACT_DIR" - TOP_LEVEL_DIR=$(find . -mindepth 1 -maxdepth 1 -type d) - - if [ -d "$TOP_LEVEL_DIR" ]; then - mv "$TOP_LEVEL_DIR"/* "$TARGET_DIR" - fi - - rm -rf "$TMP_FILE" "$EXTRACT_DIR" - echo "ROMS installed successfully in $TARGET_DIR" -} - -remove_roms() { - if [ -d "$TARGET_DIR" ]; then - rm -rf "$TARGET_DIR" - echo "ROMS removed successfully from $TARGET_DIR" - else - echo "No ROMS directory found in $TARGET_DIR" - fi -} - -help() { - echo "" - echo "$0 [-h|-i|-r]" - echo " -h : this help" - echo " -i : install (this parameter can be omitted)" - echo " -r : remove the ROMS" - echo "" -} - -case "$1" in - -h) - help - ;; - -r) - remove_roms - ;; - -i|*) - install_roms - ;; -esac - -exit 0 From 366ebea651ba5cbdcf000f3c8734f47a37cee96c Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 17 Aug 2024 01:22:06 +0200 Subject: [PATCH 18/26] S3 ViRGE fixes by TC995 and switched threads on Windows builds back to Win32 threads since the C++ threads were breaking the ViRGE blitting in some cases (eg. CD Gamer on 24 bpp). --- src/CMakeLists.txt | 4 +- src/qt/win_thread.c | 179 +++++++++++++++++++++++++++++++++++++++ src/video/vid_s3_virge.c | 43 +++++++--- 3 files changed, 214 insertions(+), 12 deletions(-) create mode 100644 src/qt/win_thread.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0841fabdd..ad339040f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,7 +27,9 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux") add_compile_definitions(_FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE=1 _LARGEFILE64_SOURCE=1) endif() -if(CPPTHREADS) +if(WIN32) + target_sources(86Box PRIVATE qt/win_thread.c) +else() target_sources(86Box PRIVATE thread.cpp) endif() diff --git a/src/qt/win_thread.c b/src/qt/win_thread.c new file mode 100644 index 000000000..9a4f046b3 --- /dev/null +++ b/src/qt/win_thread.c @@ -0,0 +1,179 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Implement threads and mutexes for the Win32 platform. + * + * + * + * Authors: Sarah Walker, + * Fred N. van Kempen, + * + * Copyright 2008-2018 Sarah Walker. + * Copyright 2017-2018 Fred N. van Kempen. + */ +#define UNICODE +#define BITMAP WINDOWS_BITMAP +#include +#include +#include +#undef BITMAP +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/plat.h> +#include <86box/thread.h> + +typedef struct { + HANDLE handle; +} win_event_t; + +/* For compatibility with thread.h, but Win32 does not allow named threads. */ +thread_t * +thread_create_named(void (*func)(void *param), void *param, UNUSED(const char *name)) +{ + uintptr_t bt = _beginthread(func, 0, param); + return ((thread_t *) bt); +} + +int +thread_test_mutex(thread_t *arg) +{ + if (arg == NULL) + return (0); + + return (WaitForSingleObject(arg, 0) == WAIT_OBJECT_0) ? 1 : 0; +} + +int +thread_wait(thread_t *arg) +{ + if (arg == NULL) + return (0); + + if (WaitForSingleObject(arg, INFINITE)) + return (1); + + return (0); +} + +event_t * +thread_create_event(void) +{ + win_event_t *ev = malloc(sizeof(win_event_t)); + + ev->handle = CreateEvent(NULL, FALSE, FALSE, NULL); + + return ((event_t *) ev); +} + +void +thread_set_event(event_t *arg) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return; + + SetEvent(ev->handle); +} + +void +thread_reset_event(event_t *arg) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return; + + ResetEvent(ev->handle); +} + +int +thread_wait_event(event_t *arg, int timeout) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return (0); + + if (ev->handle == NULL) + return (0); + + if (timeout == -1) + timeout = INFINITE; + + if (WaitForSingleObject(ev->handle, timeout)) + return (1); + + return (0); +} + +void +thread_destroy_event(event_t *arg) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return; + + CloseHandle(ev->handle); + + free(ev); +} + +mutex_t * +thread_create_mutex(void) +{ + mutex_t *mutex = malloc(sizeof(CRITICAL_SECTION)); + + InitializeCriticalSection(mutex); + + return mutex; +} + +int +thread_wait_mutex(mutex_t *mutex) +{ + if (mutex == NULL) + return (0); + + LPCRITICAL_SECTION critsec = (LPCRITICAL_SECTION) mutex; + + EnterCriticalSection(critsec); + + return 1; +} + +int +thread_release_mutex(mutex_t *mutex) +{ + if (mutex == NULL) + return (0); + + LPCRITICAL_SECTION critsec = (LPCRITICAL_SECTION) mutex; + + LeaveCriticalSection(critsec); + + return 1; +} + +void +thread_close_mutex(mutex_t *mutex) +{ + if (mutex == NULL) + return; + + LPCRITICAL_SECTION critsec = (LPCRITICAL_SECTION) mutex; + + DeleteCriticalSection(critsec); + + free(critsec); +} diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 155380480..24f325c49 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -1804,13 +1804,34 @@ fifo_thread(void *param) { } static void -s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) { +s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) +{ fifo_entry_t *fifo = &virge->fifo[virge->fifo_write_idx & FIFO_MASK]; + int limit = 0; - if (FIFO_FULL) { - thread_reset_event(virge->fifo_not_full_event); - if (FIFO_FULL) - thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + if (type == FIFO_WRITE_DWORD) { + switch (addr & 0xfffc) { + case 0xa500: + if (val & CMD_SET_AE) + limit = 1; + break; + default: + break; + } + } + + if (limit) { + if (FIFO_ENTRIES >= 16) { + thread_reset_event(virge->fifo_not_full_event); + if (FIFO_ENTRIES >= 16) + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } + } else { + if (FIFO_FULL) { + thread_reset_event(virge->fifo_not_full_event); + if (FIFO_FULL) + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } } fifo->val = val; @@ -2190,13 +2211,13 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { for (y = 0; y < 4; y++) { for (x = 0; x < 8; x++) { if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) - mono_pattern[y * 8 + x] = virge->s3d.pat_fg_clr; + mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_fg_clr; else - mono_pattern[y * 8 + x] = virge->s3d.pat_bg_clr; + mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_bg_clr; if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) - mono_pattern[(y + 4) * 8 + x] = virge->s3d.pat_fg_clr; + mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_fg_clr; else - mono_pattern[(y + 4) * 8 + x] = virge->s3d.pat_bg_clr; + mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_bg_clr; } } } @@ -3312,7 +3333,7 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; } - + if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { uint32_t src_col; int src_r = 0; @@ -3375,7 +3396,7 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int virge->pixel_count++; } } - + tri_skip_line: state->x1 += dx1; state->x2 += dx2; From dc8d237126455a293c8d2302bd732fcd83068c86 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 17 Aug 2024 19:46:29 +0200 Subject: [PATCH 19/26] S3 ViRGE: Force bit 2 of CRTC register 33h to be set when read, fixes Windows 95 GX2 lock-ups. --- src/video/vid_s3_virge.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 24f325c49..286e459e7 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -720,6 +720,9 @@ s3_virge_in(uint16_t addr, void *priv) { case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break; + case 0x33: + ret = (svga->crtc[0x33] | 0x04); + break; case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break; From d3160e21b4686e3df6e9e1c451a0691b4b3e7e5e Mon Sep 17 00:00:00 2001 From: OBattler Date: Sun, 18 Aug 2024 03:39:39 +0200 Subject: [PATCH 20/26] Reverted an unnecessary change in floppy/fdc.c fixes #4727. --- src/floppy/fdc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/floppy/fdc.c b/src/floppy/fdc.c index 3fd492bd2..6baef8ab8 100644 --- a/src/floppy/fdc.c +++ b/src/floppy/fdc.c @@ -1635,6 +1635,8 @@ fdc_callback(void *priv) return; } if (fdd_get_head(real_drive(fdc, fdc->drive)) == 0) { + fdc->sector = 1; + fdc->head |= 1; fdd_set_head(real_drive(fdc, fdc->drive), 1); if (!fdd_is_double_sided(real_drive(fdc, fdc->drive))) { fdc_noidam(fdc); From 04b5a99ae9269994e32163df1ab2e8d3467e43a2 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sun, 18 Aug 2024 06:34:04 +0200 Subject: [PATCH 21/26] (S)VGA render: Actually use the DAC mask, fixes #4720. --- src/video/vid_svga_render.c | 250 ++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 125 deletions(-) diff --git a/src/video/vid_svga_render.c b/src/video/vid_svga_render.c index 74d9a4d0c..c05d308fa 100644 --- a/src/video/vid_svga_render.c +++ b/src/video/vid_svga_render.c @@ -178,14 +178,14 @@ svga_render_text_40(svga_t *svga) charaddr = svga->charseta + (chr * 128); if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -256,13 +256,13 @@ svga_render_text_80(svga_t *svga) charaddr = svga->charseta + (chr * 128); if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -323,13 +323,13 @@ svga_render_text_80_ksc5601(svga_t *svga) attr = svga->vram[addr + 1]; if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -378,13 +378,13 @@ svga_render_text_80_ksc5601(svga_t *svga) attr = svga->vram[((svga->ma << 1) + 1) & svga->vram_display_mask]; if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -468,14 +468,14 @@ svga_render_2bpp_s3_lowres(svga_t *svga) else svga->ma += 4; svga->ma &= svga->vram_mask; - p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 16; } } @@ -501,14 +501,14 @@ svga_render_2bpp_s3_lowres(svga_t *svga) svga->ma &= svga->vram_mask; - p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 16; } @@ -566,14 +566,14 @@ svga_render_2bpp_s3_highres(svga_t *svga) else svga->ma += 4; svga->ma &= svga->vram_mask; - p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[3] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[7] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[3] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[7] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 8; } } @@ -599,14 +599,14 @@ svga_render_2bpp_s3_highres(svga_t *svga) svga->ma &= svga->vram_mask; - p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[3] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[7] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[3] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[7] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 8; } @@ -652,17 +652,17 @@ svga_render_2bpp_headland_highres(svga_t *svga) svga->ma &= svga->vram_mask; dat = edatlookup[edat[0] >> 6][edat[1] >> 6] | (edatlookup[edat[2] >> 6][edat[3] >> 6] << 2); - p[0] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[1] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[0] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[1] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; dat = edatlookup[(edat[0] >> 4) & 3][(edat[1] >> 4) & 3] | (edatlookup[(edat[2] >> 4) & 3][(edat[3] >> 4) & 3] << 2); - p[2] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[3] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[2] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[3] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; dat = edatlookup[(edat[0] >> 2) & 3][(edat[1] >> 2) & 3] | (edatlookup[(edat[2] >> 2) & 3][(edat[3] >> 2) & 3] << 2); - p[4] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[5] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[4] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[5] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; dat = edatlookup[edat[0] & 3][edat[1] & 3] | (edatlookup[edat[2] & 3][edat[3] & 3] << 2); - p[6] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[7] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[6] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[7] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; p += 8; } @@ -868,8 +868,8 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits) } } else if (combine8bits) { if (svga->packed_4bpp) { - uint32_t p0 = svga->map8[c0]; - uint32_t p1 = svga->map8[c1]; + uint32_t p0 = svga->map8[c0 & svga->dac_mask]; + uint32_t p1 = svga->map8[c1 & svga->dac_mask]; const int outoffs = i << dwshift; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; @@ -877,14 +877,14 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits) p[outoffs + subx + dotwidth] = p1; } else { uint32_t ccombined = (c0 << 4) | c1; - uint32_t p0 = svga->map8[ccombined]; + uint32_t p0 = svga->map8[ccombined & svga->dac_mask]; const int outoffs = (i >> 1) << dwshift; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; } } else { - uint32_t p0 = svga->pallook[svga->egapal[c0]]; - uint32_t p1 = svga->pallook[svga->egapal[c1]]; + uint32_t p0 = svga->pallook[svga->egapal[c0] & svga->dac_mask]; + uint32_t p1 = svga->pallook[svga->egapal[c1] & svga->dac_mask]; const int outoffs = i << dwshift; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; @@ -934,16 +934,16 @@ svga_render_8bpp_clone_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -963,16 +963,16 @@ svga_render_8bpp_clone_highres(svga_t *svga) if (!svga->remap_required) { for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -981,10 +981,10 @@ svga_render_8bpp_clone_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 4) { addr = svga->remap_func(svga, svga->ma); dat = *(uint32_t *) (&svga->vram[addr & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 4; @@ -1043,10 +1043,10 @@ svga_render_8bpp_lowres(svga_t *svga) if (!svga->remap_required) { for (x = 0; x <= (svga->hdisp + svga->scrollcache); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = p[1] = svga->map8[dat & 0xff]; - p[2] = p[3] = svga->map8[(dat >> 8) & 0xff]; - p[4] = p[5] = svga->map8[(dat >> 16) & 0xff]; - p[6] = p[7] = svga->map8[(dat >> 24) & 0xff]; + p[0] = p[1] = svga->map8[dat & svga->dac_mask & 0xff]; + p[2] = p[3] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[4] = p[5] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[6] = p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 8; @@ -1055,10 +1055,10 @@ svga_render_8bpp_lowres(svga_t *svga) for (x = 0; x <= (svga->hdisp + svga->scrollcache); x += 8) { addr = svga->remap_func(svga, svga->ma); dat = *(uint32_t *) (&svga->vram[addr & svga->vram_display_mask]); - p[0] = p[1] = svga->map8[dat & 0xff]; - p[2] = p[3] = svga->map8[(dat >> 8) & 0xff]; - p[4] = p[5] = svga->map8[(dat >> 16) & 0xff]; - p[6] = p[7] = svga->map8[(dat >> 24) & 0xff]; + p[0] = p[1] = svga->map8[dat & svga->dac_mask & 0xff]; + p[2] = p[3] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[4] = p[5] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[6] = p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 8; @@ -1091,16 +1091,16 @@ svga_render_8bpp_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -1120,16 +1120,16 @@ svga_render_8bpp_highres(svga_t *svga) if (!svga->remap_required) { for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -1138,10 +1138,10 @@ svga_render_8bpp_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 4) { addr = svga->remap_func(svga, svga->ma); dat = *(uint32_t *) (&svga->vram[addr & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 4; @@ -1173,19 +1173,19 @@ svga_render_8bpp_tseng_lowres(svga_t *svga) dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[0] = p[1] = svga->map8[dat & 0xff]; + p[0] = p[1] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[2] = p[3] = svga->map8[dat & 0xff]; + p[2] = p[3] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[4] = p[5] = svga->map8[dat & 0xff]; + p[4] = p[5] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[6] = p[7] = svga->map8[dat & 0xff]; + p[6] = p[7] = svga->map8[dat & svga->dac_mask & 0xff]; svga->ma += 4; p += 8; @@ -1214,36 +1214,36 @@ svga_render_8bpp_tseng_highres(svga_t *svga) dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[0] = svga->map8[dat & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[1] = svga->map8[dat & 0xff]; + p[1] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[2] = svga->map8[dat & 0xff]; + p[2] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[3] = svga->map8[dat & 0xff]; + p[3] = svga->map8[dat & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[4] = svga->map8[dat & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[5] = svga->map8[dat & 0xff]; + p[5] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[6] = svga->map8[dat & 0xff]; + p[6] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[7] = svga->map8[dat & 0xff]; + p[7] = svga->map8[dat & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; From 882aabf95d198b3df2fc781aa0b1f6b5b38b9f91 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sun, 18 Aug 2024 20:35:47 +0200 Subject: [PATCH 22/26] Diamond S3 86c911: Use the correct clock chip, fixes #4730. --- src/video/vid_s3.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/video/vid_s3.c b/src/video/vid_s3.c index d2b5a87fe..8a9bedf02 100644 --- a/src/video/vid_s3.c +++ b/src/video/vid_s3.c @@ -9930,8 +9930,14 @@ s3_init(const device_t *info) s3->width = 1024; svga->ramdac = device_add(&sc11483_ramdac_device); - svga->clock_gen = device_add(&av9194_device); - svga->getclock = av9194_getclock; + if (s3->card_type == S3_ORCHID_86C911) { + svga->clock_gen = device_add(&av9194_device); + svga->getclock = av9194_getclock; + } else { + /* DCS2824-0 = Diamond ICD2061A-compatible. */ + svga->clock_gen = device_add(&icd2061_device); + svga->getclock = icd2061_getclock; + } break; case S3_AMI_86C924: From aebaabb6b8e78beedd3b8210113183a9141fde98 Mon Sep 17 00:00:00 2001 From: TC1995 Date: Tue, 20 Aug 2024 15:45:56 +0200 Subject: [PATCH 23/26] ViRGE changes of the day (August 20th, 2024) 1. ROM updated to 1.03. 2. Re-apply more fixes that were missed during the restructuring to be on par with PCem's speed. 3. Implemented Command DMA bus mastering required by Diamond Stealth 3D specific drivers (especially the GX2-based 4000). 4. Some indentation fixes. --- src/video/vid_s3_virge.c | 1079 ++++++++++++++++++++------------------ 1 file changed, 561 insertions(+), 518 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 286e459e7..d0831ff4a 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -67,7 +67,7 @@ static int dither[4][4] = { #define ROM_DIAMOND_STEALTH3D_2000PRO "roms/video/s3virge/virgedxdiamond.vbi" #define ROM_VIRGE_GX "roms/video/s3virge/86c375_4.bin" #define ROM_VIRGE_GX2 "roms/video/s3virge/flagpoint.VBI" -#define ROM_DIAMOND_STEALTH3D_4000 "roms/video/s3virge/86c357.bin" +#define ROM_DIAMOND_STEALTH3D_4000 "roms/video/s3virge/DS3D4K v1.03 Brightness bug fix.bin" #define ROM_TRIO3D2X "roms/video/s3virge/TRIO3D2X_8mbsdr.VBI" #define RB_SIZE 256 @@ -286,9 +286,9 @@ typedef struct virge_t { s3d_t s3d_tri; s3d_t s3d_buffer[RB_SIZE]; - int s3d_read_idx; - int s3d_write_idx; - int s3d_busy; + atomic_int s3d_read_idx; + atomic_int s3d_write_idx; + atomic_int s3d_busy; struct { uint32_t pri_ctrl; @@ -328,15 +328,14 @@ typedef struct virge_t { } streams; fifo_entry_t fifo[FIFO_SIZE]; - volatile int fifo_read_idx, fifo_write_idx; - - volatile int fifo_thread_run, render_thread_run; + atomic_int fifo_read_idx, fifo_write_idx; + atomic_int fifo_thread_run, render_thread_run; thread_t * fifo_thread; event_t *wake_fifo_thread; event_t * fifo_not_full_event; - int virge_busy; + atomic_int virge_busy; uint8_t subsys_stat; uint8_t subsys_cntl; @@ -361,7 +360,16 @@ typedef struct virge_t { uint8_t cmd_dma; uint32_t cmd_dma_base; - uint32_t dma_ptr; + uint32_t cmd_dma_buf_size; + uint32_t cmd_dma_buf_size_mask; + uint32_t cmd_base_addr; + uint32_t cmd_dma_write_ptr_reg; + uint32_t cmd_dma_write_ptr_update; + uint32_t cmd_dma_read_ptr_reg; + uint32_t dma_val; + uint32_t dma_dbl_words; + uint32_t dma_mmio_addr; + uint32_t dma_data_type; int pci; int is_agp; @@ -394,6 +402,8 @@ static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv); static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv); static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv); +static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type); + enum { CMD_SET_AE = 1, CMD_SET_HC = (1 << 1), @@ -440,6 +450,8 @@ enum { #define INT_S3D_DONE (1 << 1) #define INT_FIFO_OVF (1 << 2) #define INT_FIFO_EMP (1 << 3) +#define INT_HOST_DONE (1 << 4) +#define INT_CMD_DONE (1 << 5) #define INT_3DF_EMP (1 << 6) #define INT_MASK 0xff @@ -726,9 +738,6 @@ s3_virge_in(uint16_t addr, void *priv) { case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break; - case 0x36: - ret = (svga->crtc[0x36] & 0xfc) | 2; - break; /*PCI bus*/ case 0x45: virge->hwc_col_stack_pos = 0; ret = svga->crtc[0x45]; @@ -783,7 +792,8 @@ s3_virge_in(uint16_t addr, void *priv) { } static void -s3_virge_recalctimings(svga_t *svga) { +s3_virge_recalctimings(svga_t *svga) +{ virge_t *virge = (virge_t *) svga->priv; svga->hdisp = svga->hdisp_old; @@ -855,7 +865,7 @@ s3_virge_recalctimings(svga_t *svga) { video_force_resize_set_monitor(1, svga->monitor_index); } - if ((svga->crtc[0x67] & 0xc) != 0xc) { /*VGA mode*/ + if (((svga->crtc[0x67] & 0xc) != 0xc) || (virge->chip >= S3_VIRGEGX2)) { /*VGA mode*/ svga->ma_latch |= (virge->ma_ext << 16); if (svga->crtc[0x51] & 0x30) svga->rowoffset |= (svga->crtc[0x51] & 0x30) << 4; @@ -892,8 +902,8 @@ s3_virge_recalctimings(svga_t *svga) { svga->render = svga_render_32bpp_highres; break; - default: - break; + default: + break; } svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && @@ -1035,7 +1045,7 @@ s3_virge_updatemapping(virge_t *virge) { virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24); if ((svga->crtc[0x58] & 0x10) || (virge->advfunc_cntl & 0x10)) { /*Linear framebuffer*/ - switch (svga->crtc[0x58] & 3) { + switch (svga->crtc[0x58] & 7) { case 0: /*64k*/ virge->linear_size = 0x10000; break; @@ -1104,7 +1114,8 @@ s3_virge_wait_fifo_idle(virge_t *virge) { } static uint8_t -s3_virge_mmio_read(uint32_t addr, void *priv) { +s3_virge_mmio_read(uint32_t addr, void *priv) +{ virge_t *virge = (virge_t *) priv; uint8_t ret; @@ -1121,6 +1132,7 @@ s3_virge_mmio_read(uint32_t addr, void *priv) { ret |= 0x10; else ret |= 0x30; + if (!virge->virge_busy) wake_fifo_thread(virge); return ret; @@ -1153,7 +1165,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) { } static uint16_t -s3_virge_mmio_read_w(uint32_t addr, void *priv) { +s3_virge_mmio_read_w(uint32_t addr, void *priv) +{ virge_t *virge = (virge_t *) priv; uint16_t ret; @@ -1187,7 +1200,6 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) { static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; uint32_t ret = 0xffffffff; switch (addr & 0xfffc) { @@ -1208,46 +1220,30 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) { break; case 0x81a0: ret = virge->streams.blend_ctrl; - svga_recalctimings(svga); break; case 0x81c0: ret = virge->streams.pri_fb0; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81c4: ret = virge->streams.pri_fb1; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81c8: ret = virge->streams.pri_stride; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81cc: ret = virge->streams.buffer_ctrl; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81d0: ret = virge->streams.sec_fb0; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81d4: ret = virge->streams.sec_fb1; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81d8: ret = virge->streams.sec_stride; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; break; case 0x81dc: ret = virge->streams.overlay_ctrl; - svga->fullchange = changeframecount; break; case 0x81e0: ret = virge->streams.k1_vert_scale; @@ -1280,6 +1276,7 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) { ret |= 0x00001000; else ret |= 0x00003000; + ret |= virge->subsys_stat; if (!virge->virge_busy) wake_fifo_thread(virge); @@ -1294,12 +1291,15 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) { ret = virge->cmd_dma_base; break; case 0x8594: + ret = virge->cmd_dma_write_ptr_reg; break; case 0x8598: - ret = virge->dma_ptr; + ret = virge->cmd_dma_read_ptr_reg; + if (ret > virge->cmd_dma_write_ptr_reg) + ret = virge->cmd_dma_write_ptr_reg; break; case 0x859c: - ret = virge->cmd_dma; + ret = 0; /*To prevent DMA overflows.*/ break; case 0xa4d4: @@ -1426,13 +1426,322 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) { default: ret = s3_virge_mmio_read_w(addr, priv) | (s3_virge_mmio_read_w(addr + 2, priv) << 16); + break; } + //pclog("MMIO ReadL=%04x, ret=%08x.\n", addr & 0xfffc, ret); return ret; } static void -fifo_thread(void *param) { +s3_virge_mmio_write_fifo_l(virge_t *virge, uint32_t addr, uint32_t val) +{ + switch (addr) { + case 0xa000 ... 0xa1fc: + { + int x = addr & 4; + int y = (addr >> 3) & 7; + int color; + int byte; + uint32_t newaddr = addr; + virge->s3d.pattern_8[y * 8 + x] = val & 0xff; + virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; + virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; + virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; + + x = (addr >> 1) & 6; + y = (addr >> 4) & 7; + virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; + virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; + + newaddr &= 0x00ff; + for (uint8_t xx = 0; xx < 4; xx++) { + x = ((newaddr + xx) / 3) % 8; + y = ((newaddr + xx) / 24) % 8; + color = ((newaddr + xx) % 3) << 3; + byte = (xx << 3); + virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); + virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; + } + + x = (addr >> 2) & 7; + y = (addr >> 5) & 7; + virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; + } break; + + case 0xa4d4: + case 0xa8d4: + case 0xacd4: + virge->s3d.src_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4d8: + case 0xa8d8: + case 0xacd8: + virge->s3d.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4dc: + case 0xa8dc: + case 0xacdc: + virge->s3d.clip_l = (val >> 16) & 0x7ff; + virge->s3d.clip_r = val & 0x7ff; + break; + case 0xa4e0: + case 0xa8e0: + case 0xace0: + virge->s3d.clip_t = (val >> 16) & 0x7ff; + virge->s3d.clip_b = val & 0x7ff; + break; + case 0xa4e4: + case 0xa8e4: + case 0xace4: + virge->s3d.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xa4e8: + case 0xace8: + virge->s3d.mono_pat_0 = val; + break; + case 0xa4ec: + case 0xacec: + virge->s3d.mono_pat_1 = val; + break; + case 0xa4f0: + case 0xacf0: + virge->s3d.pat_bg_clr = val; + break; + case 0xa4f4: + case 0xa8f4: + case 0xacf4: + virge->s3d.pat_fg_clr = val; + break; + case 0xa4f8: + virge->s3d.src_bg_clr = val; + break; + case 0xa4fc: + virge->s3d.src_fg_clr = val; + break; + case 0xa500: + case 0xa900: + case 0xad00: + virge->s3d.cmd_set = val; + if (!(val & CMD_SET_AE)) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa504: + virge->s3d.r_width = (val >> 16) & 0x7ff; + virge->s3d.r_height = val & 0x7ff; + break; + case 0xa508: + virge->s3d.rsrc_x = (val >> 16) & 0x7ff; + virge->s3d.rsrc_y = val & 0x7ff; + break; + case 0xa50c: + virge->s3d.rdest_x = (val >> 16) & 0x7ff; + virge->s3d.rdest_y = val & 0x7ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa96c: + virge->s3d.lxend0 = (val >> 16) & 0x7ff; + virge->s3d.lxend1 = val & 0x7ff; + break; + case 0xa970: + virge->s3d.ldx = (int32_t)val; + break; + case 0xa974: + virge->s3d.lxstart = val; + break; + case 0xa978: + virge->s3d.lystart = val & 0x7ff; + break; + case 0xa97c: + virge->s3d.lycnt = val & 0x7ff; + virge->s3d.line_dir = val >> 31; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xad68: + virge->s3d.prdx = val; + break; + case 0xad6c: + virge->s3d.prxstart = val; + break; + case 0xad70: + virge->s3d.pldx = val; + break; + case 0xad74: + virge->s3d.plxstart = val; + break; + case 0xad78: + virge->s3d.pystart = val & 0x7ff; + break; + case 0xad7c: + virge->s3d.pycnt = val & 0x300007ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xb0d4: + case 0xb4d4: + virge->s3d_tri.z_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0d8: + case 0xb4d8: + virge->s3d_tri.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0dc: + case 0xb4dc: + virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_r = val & 0x7ff; + break; + case 0xb0e0: + case 0xb4e0: + virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_b = val & 0x7ff; + break; + case 0xb0e4: + case 0xb4e4: + virge->s3d_tri.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xb0e8: + case 0xb4e8: + virge->s3d_tri.z_str = val & 0xff8; + break; + case 0xb4ec: + virge->s3d_tri.tex_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb4f0: + virge->s3d_tri.tex_bdr_clr = val & 0xffffff; + break; + case 0xb0f4: + case 0xb4f4: + virge->s3d_tri.fog_b = val & 0xff; + virge->s3d_tri.fog_g = (val >> 8) & 0xff; + virge->s3d_tri.fog_r = (val >> 16) & 0xff; + break; + case 0xb100: + case 0xb500: + virge->s3d_tri.cmd_set = val; + if (!(val & CMD_SET_AE)) + queue_triangle(virge); + break; + case 0xb504: + virge->s3d_tri.tbv = val & 0xfffff; + break; + case 0xb508: + virge->s3d_tri.tbu = val & 0xfffff; + break; + case 0xb50c: + virge->s3d_tri.TdWdX = val; + break; + case 0xb510: + virge->s3d_tri.TdWdY = val; + break; + case 0xb514: + virge->s3d_tri.tws = val; + break; + case 0xb518: + virge->s3d_tri.TdDdX = val; + break; + case 0xb51c: + virge->s3d_tri.TdVdX = val; + break; + case 0xb520: + virge->s3d_tri.TdUdX = val; + break; + case 0xb524: + virge->s3d_tri.TdDdY = val; + break; + case 0xb528: + virge->s3d_tri.TdVdY = val; + break; + case 0xb52c: + virge->s3d_tri.TdUdY = val; + break; + case 0xb530: + virge->s3d_tri.tds = val; + break; + case 0xb534: + virge->s3d_tri.tvs = val; + break; + case 0xb538: + virge->s3d_tri.tus = val; + break; + case 0xb53c: + virge->s3d_tri.TdGdX = val >> 16; + virge->s3d_tri.TdBdX = val & 0xffff; + break; + case 0xb540: + virge->s3d_tri.TdAdX = val >> 16; + virge->s3d_tri.TdRdX = val & 0xffff; + break; + case 0xb544: + virge->s3d_tri.TdGdY = val >> 16; + virge->s3d_tri.TdBdY = val & 0xffff; + break; + case 0xb548: + virge->s3d_tri.TdAdY = val >> 16; + virge->s3d_tri.TdRdY = val & 0xffff; + break; + case 0xb54c: + virge->s3d_tri.tgs = (val >> 16) & 0xffff; + virge->s3d_tri.tbs = val & 0xffff; + break; + case 0xb550: + virge->s3d_tri.tas = (val >> 16) & 0xffff; + virge->s3d_tri.trs = val & 0xffff; + break; + + case 0xb554: + virge->s3d_tri.TdZdX = val; + break; + case 0xb558: + virge->s3d_tri.TdZdY = val; + break; + case 0xb55c: + virge->s3d_tri.tzs = val; + break; + case 0xb560: + virge->s3d_tri.TdXdY12 = val; + break; + case 0xb564: + virge->s3d_tri.txend12 = val; + break; + case 0xb568: + virge->s3d_tri.TdXdY01 = val; + break; + case 0xb56c: + virge->s3d_tri.txend01 = val; + break; + case 0xb570: + virge->s3d_tri.TdXdY02 = val; + break; + case 0xb574: + virge->s3d_tri.txs = val; + break; + case 0xb578: + virge->s3d_tri.tys = val; + break; + case 0xb57c: + virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; + virge->s3d_tri.ty12 = val & 0x7ff; + virge->s3d_tri.tlr = val >> 31; + if (virge->s3d_tri.cmd_set & CMD_SET_AE) + queue_triangle(virge); + break; + } +} + +static void +fifo_thread(void *param) +{ virge_t *virge = (virge_t *)param; while (virge->fifo_thread_run) { @@ -1448,19 +1757,16 @@ fifo_thread(void *param) { switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: - if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) + if (((fifo->addr_type & FIFO_ADDR) & 0xffff) < 0x8000) s3_virge_bitblt(virge, 8, val); - else if (((fifo->addr_type & FIFO_ADDR) & 0xffff) == 0x859c) - virge->cmd_dma = val; break; case FIFO_WRITE_WORD: - if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (((fifo->addr_type & FIFO_ADDR) & 0xfffe) < 0x8000) { if (virge->s3d.cmd_set & CMD_SET_MS) s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); else - s3_virge_bitblt(virge, 16, val); - } else if (((fifo->addr_type & FIFO_ADDR) & 0xfffe) == 0x859c) - virge->cmd_dma = val; + s3_virge_bitblt(virge, 16, val); + } break; case FIFO_WRITE_DWORD: if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { @@ -1470,324 +1776,8 @@ fifo_thread(void *param) { ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); else s3_virge_bitblt(virge, 32, val); - } else { - switch ((fifo->addr_type & FIFO_ADDR) & 0xfffc) { - case 0x8590: - virge->cmd_dma_base = val; - break; - - case 0x8594: - virge->dma_ptr = val; - break; - - case 0x8598: - break; - - case 0x859c: - virge->cmd_dma = val; - break; - - case 0xa000 ... 0xa1fc: - { - int x = (fifo->addr_type & FIFO_ADDR) & 4; - int y = ((fifo->addr_type & FIFO_ADDR) >> 3) & 7; - int color; - int byte; - uint32_t addr = (fifo->addr_type & FIFO_ADDR); - virge->s3d.pattern_8[y * 8 + x] = val & 0xff; - virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; - virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; - virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; - - x = ((fifo->addr_type & FIFO_ADDR) >> 1) & 6; - y = ((fifo->addr_type & FIFO_ADDR) >> 4) & 7; - virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; - virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; - - addr &= 0x00ff; - for (uint8_t xx = 0; xx < 4; xx++) { - x = ((addr + xx) / 3) % 8; - y = ((addr + xx) / 24) % 8; - color = ((addr + xx) % 3) << 3; - byte = (xx << 3); - virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); - virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; - } - - x = ((fifo->addr_type & FIFO_ADDR) >> 2) & 7; - y = ((fifo->addr_type & FIFO_ADDR) >> 5) & 7; - virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; - } break; - - case 0xa4d4: - case 0xa8d4: - case 0xacd4: - virge->s3d.src_base = val & ((virge->memory_size == 8) ? - (val & 0x7ffff8) : (val & 0x3ffff8)); - break; - case 0xa4d8: - case 0xa8d8: - case 0xacd8: - virge->s3d.dest_base = val & ((virge->memory_size == 8) ? - (val & 0x7ffff8) : (val & 0x3ffff8)); - break; - case 0xa4dc: - case 0xa8dc: - case 0xacdc: - virge->s3d.clip_l = (val >> 16) & 0x7ff; - virge->s3d.clip_r = val & 0x7ff; - break; - case 0xa4e0: - case 0xa8e0: - case 0xace0: - virge->s3d.clip_t = (val >> 16) & 0x7ff; - virge->s3d.clip_b = val & 0x7ff; - break; - case 0xa4e4: - case 0xa8e4: - case 0xace4: - virge->s3d.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xa4e8: - case 0xace8: - virge->s3d.mono_pat_0 = val; - break; - case 0xa4ec: - case 0xacec: - virge->s3d.mono_pat_1 = val; - break; - case 0xa4f0: - case 0xacf0: - virge->s3d.pat_bg_clr = val; - break; - case 0xa4f4: - case 0xa8f4: - case 0xacf4: - virge->s3d.pat_fg_clr = val; - break; - case 0xa4f8: - virge->s3d.src_bg_clr = val; - break; - case 0xa4fc: - virge->s3d.src_fg_clr = val; - break; - case 0xa500: - case 0xa900: - case 0xad00: - virge->s3d.cmd_set = val; - if (!(val & CMD_SET_AE)) - s3_virge_bitblt(virge, -1, 0); - break; - case 0xa504: - virge->s3d.r_width = (val >> 16) & 0x7ff; - virge->s3d.r_height = val & 0x7ff; - break; - case 0xa508: - virge->s3d.rsrc_x = (val >> 16) & 0x7ff; - virge->s3d.rsrc_y = val & 0x7ff; - break; - case 0xa50c: - virge->s3d.rdest_x = (val >> 16) & 0x7ff; - virge->s3d.rdest_y = val & 0x7ff; - if (virge->s3d.cmd_set & CMD_SET_AE) - s3_virge_bitblt(virge, -1, 0); - break; - case 0xa96c: - virge->s3d.lxend0 = (val >> 16) & 0x7ff; - virge->s3d.lxend1 = val & 0x7ff; - break; - case 0xa970: - virge->s3d.ldx = (int32_t)val; - break; - case 0xa974: - virge->s3d.lxstart = val; - break; - case 0xa978: - virge->s3d.lystart = val & 0x7ff; - break; - case 0xa97c: - virge->s3d.lycnt = val & 0x7ff; - virge->s3d.line_dir = val >> 31; - if (virge->s3d.cmd_set & CMD_SET_AE) - s3_virge_bitblt(virge, -1, 0); - break; - - case 0xad68: - virge->s3d.prdx = val; - break; - case 0xad6c: - virge->s3d.prxstart = val; - break; - case 0xad70: - virge->s3d.pldx = val; - break; - case 0xad74: - virge->s3d.plxstart = val; - break; - case 0xad78: - virge->s3d.pystart = val & 0x7ff; - break; - case 0xad7c: - virge->s3d.pycnt = val & 0x300007ff; - if (virge->s3d.cmd_set & CMD_SET_AE) - s3_virge_bitblt(virge, -1, 0); - break; - - case 0xb0d4: - case 0xb4d4: - virge->s3d_tri.z_base = val & ((virge->memory_size == 8) ? - (val & 0x7ffff8) : (val & 0x3ffff8)); - break; - case 0xb0d8: - case 0xb4d8: - virge->s3d_tri.dest_base = val & ((virge->memory_size == 8) ? - (val & 0x7ffff8) : (val & 0x3ffff8)); - break; - case 0xb0dc: - case 0xb4dc: - virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_r = val & 0x7ff; - break; - case 0xb0e0: - case 0xb4e0: - virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_b = val & 0x7ff; - break; - case 0xb0e4: - case 0xb4e4: - virge->s3d_tri.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xb0e8: - case 0xb4e8: - virge->s3d_tri.z_str = val & 0xff8; - break; - case 0xb4ec: - virge->s3d_tri.tex_base = val & ((virge->memory_size == 8) ? - (val & 0x7ffff8) : (val & 0x3ffff8)); - break; - case 0xb4f0: - virge->s3d_tri.tex_bdr_clr = val & 0xffffff; - break; - case 0xb0f4: - case 0xb4f4: - virge->s3d_tri.fog_b = val & 0xff; - virge->s3d_tri.fog_g = (val >> 8) & 0xff; - virge->s3d_tri.fog_r = (val >> 16) & 0xff; - break; - case 0xb100: - case 0xb500: - virge->s3d_tri.cmd_set = val; - if (!(val & CMD_SET_AE)) - queue_triangle(virge); - break; - case 0xb504: - virge->s3d_tri.tbv = val & 0xfffff; - break; - case 0xb508: - virge->s3d_tri.tbu = val & 0xfffff; - break; - case 0xb50c: - virge->s3d_tri.TdWdX = val; - break; - case 0xb510: - virge->s3d_tri.TdWdY = val; - break; - case 0xb514: - virge->s3d_tri.tws = val; - break; - case 0xb518: - virge->s3d_tri.TdDdX = val; - break; - case 0xb51c: - virge->s3d_tri.TdVdX = val; - break; - case 0xb520: - virge->s3d_tri.TdUdX = val; - break; - case 0xb524: - virge->s3d_tri.TdDdY = val; - break; - case 0xb528: - virge->s3d_tri.TdVdY = val; - break; - case 0xb52c: - virge->s3d_tri.TdUdY = val; - break; - case 0xb530: - virge->s3d_tri.tds = val; - break; - case 0xb534: - virge->s3d_tri.tvs = val; - break; - case 0xb538: - virge->s3d_tri.tus = val; - break; - case 0xb53c: - virge->s3d_tri.TdGdX = val >> 16; - virge->s3d_tri.TdBdX = val & 0xffff; - break; - case 0xb540: - virge->s3d_tri.TdAdX = val >> 16; - virge->s3d_tri.TdRdX = val & 0xffff; - break; - case 0xb544: - virge->s3d_tri.TdGdY = val >> 16; - virge->s3d_tri.TdBdY = val & 0xffff; - break; - case 0xb548: - virge->s3d_tri.TdAdY = val >> 16; - virge->s3d_tri.TdRdY = val & 0xffff; - break; - case 0xb54c: - virge->s3d_tri.tgs = (val >> 16) & 0xffff; - virge->s3d_tri.tbs = val & 0xffff; - break; - case 0xb550: - virge->s3d_tri.tas = (val >> 16) & 0xffff; - virge->s3d_tri.trs = val & 0xffff; - break; - - case 0xb554: - virge->s3d_tri.TdZdX = val; - break; - case 0xb558: - virge->s3d_tri.TdZdY = val; - break; - case 0xb55c: - virge->s3d_tri.tzs = val; - break; - case 0xb560: - virge->s3d_tri.TdXdY12 = val; - break; - case 0xb564: - virge->s3d_tri.txend12 = val; - break; - case 0xb568: - virge->s3d_tri.TdXdY01 = val; - break; - case 0xb56c: - virge->s3d_tri.txend01 = val; - break; - case 0xb570: - virge->s3d_tri.TdXdY02 = val; - break; - case 0xb574: - virge->s3d_tri.txs = val; - break; - case 0xb578: - virge->s3d_tri.tys = val; - break; - case 0xb57c: - virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; - virge->s3d_tri.ty12 = val & 0x7ff; - virge->s3d_tri.tlr = val >> 31; - if (virge->s3d_tri.cmd_set & CMD_SET_AE) - queue_triangle(virge); - break; - } - } + } else + s3_virge_mmio_write_fifo_l(virge, (fifo->addr_type & FIFO_ADDR) & 0xfffc, val); break; } @@ -1802,6 +1792,9 @@ fifo_thread(void *param) { } virge->virge_busy = 0; virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; + if (virge->cmd_dma) + virge->subsys_stat |= INT_HOST_DONE | INT_CMD_DONE; + s3_virge_update_irqs(virge); } } @@ -1852,7 +1845,7 @@ static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) { virge_t *virge = (virge_t *) priv; - if ((addr & 0xfffc) < 0x8000) + if ((addr & 0xffff) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_BYTE); else switch (addr & 0xffff) { case 0x83b0 ... 0x83df: @@ -1870,9 +1863,10 @@ static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) { virge_t *virge = (virge_t *) priv; - if ((addr & 0xfffc) < 0x8000) + if ((addr & 0xfffe) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_WORD); else switch (addr & 0xfffe) { + default: case 0x83d4: s3_virge_mmio_write(addr, val, priv); s3_virge_mmio_write(addr + 1, val >> 8, priv); @@ -1887,144 +1881,192 @@ s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) { static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t * svga = &virge->svga; + svga_t *svga = &virge->svga; if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); else if ((addr & 0xe000) == 0xa000) s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); - else switch (addr & 0xfffc) { - case 0x8180: - virge->streams.pri_ctrl = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x8184: - virge->streams.chroma_ctrl = val; - break; - case 0x8190: - virge->streams.sec_ctrl = val; - virge->streams.dda_horiz_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_horiz_accumulator |= 0xfffff800; - virge->streams.sdif = (val >> 24) & 7; - break; - case 0x8194: - virge->streams.chroma_upper_bound = val; - break; - case 0x8198: - virge->streams.sec_filter = val; - virge->streams.k1_horiz_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_horiz_scale |= 0xfffff800; - virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; - if ((val >> 16) & (1 << 10)) - virge->streams.k2_horiz_scale |= 0xfffff800; - break; - case 0x81a0: - virge->streams.blend_ctrl = val; - break; - case 0x81c0: - virge->streams.pri_fb0 = val & ((virge->memory_size == 8) ? - (val & 0x7fffff) : (val & 0x3fffff)); - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81c4: - virge->streams.pri_fb1 = ((virge->memory_size == 8) ? - (val & 0x7fffff) : (val & 0x3fffff)); - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81c8: - virge->streams.pri_stride = val & 0xfff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81cc: - virge->streams.buffer_ctrl = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81d0: - virge->streams.sec_fb0 = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81d4: - virge->streams.sec_fb1 = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81d8: - virge->streams.sec_stride = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81dc: - virge->streams.overlay_ctrl = val; - break; - case 0x81e0: - virge->streams.k1_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_vert_scale |= 0xfffff800; - break; - case 0x81e4: - virge->streams.k2_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k2_vert_scale |= 0xfffff800; - break; - case 0x81e8: - virge->streams.dda_vert_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_vert_accumulator |= 0xfffff800; - break; - case 0x81ec: - virge->streams.fifo_ctrl = val; - break; - case 0x81f0: - virge->streams.pri_start = val; - virge->streams.pri_x = (val >> 16) & 0x7ff; - virge->streams.pri_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f4: - virge->streams.pri_size = val; - virge->streams.pri_w = (val >> 16) & 0x7ff; - virge->streams.pri_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f8: - virge->streams.sec_start = val; - virge->streams.sec_x = (val >> 16) & 0x7ff; - virge->streams.sec_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81fc: - virge->streams.sec_size = val; - virge->streams.sec_w = (val >> 16) & 0x7ff; - virge->streams.sec_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; + else { + switch (addr & 0xfffc) { + case 0x8180: + virge->streams.pri_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x8184: + virge->streams.chroma_ctrl = val; + break; + case 0x8190: + virge->streams.sec_ctrl = val; + virge->streams.dda_horiz_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_horiz_accumulator |= 0xfffff800; + virge->streams.sdif = (val >> 24) & 7; + break; + case 0x8194: + virge->streams.chroma_upper_bound = val; + break; + case 0x8198: + virge->streams.sec_filter = val; + virge->streams.k1_horiz_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_horiz_scale |= 0xfffff800; + virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; + if ((val >> 16) & (1 << 10)) + virge->streams.k2_horiz_scale |= 0xfffff800; + break; + case 0x81a0: + virge->streams.blend_ctrl = val; + break; + case 0x81c0: + virge->streams.pri_fb0 = val & ((virge->memory_size == 8) ? + (val & 0x7fffff) : (val & 0x3fffff)); + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81c4: + virge->streams.pri_fb1 = ((virge->memory_size == 8) ? + (val & 0x7fffff) : (val & 0x3fffff)); + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81c8: + virge->streams.pri_stride = val & 0xfff; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81cc: + virge->streams.buffer_ctrl = val; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81d0: + virge->streams.sec_fb0 = val; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81d4: + virge->streams.sec_fb1 = val; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81d8: + virge->streams.sec_stride = val; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; + break; + case 0x81dc: + virge->streams.overlay_ctrl = val; + break; + case 0x81e0: + virge->streams.k1_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_vert_scale |= 0xfffff800; + break; + case 0x81e4: + virge->streams.k2_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k2_vert_scale |= 0xfffff800; + break; + case 0x81e8: + virge->streams.dda_vert_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_vert_accumulator |= 0xfffff800; + break; + case 0x81ec: + virge->streams.fifo_ctrl = val; + break; + case 0x81f0: + virge->streams.pri_start = val; + virge->streams.pri_x = (val >> 16) & 0x7ff; + virge->streams.pri_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f4: + virge->streams.pri_size = val; + virge->streams.pri_w = (val >> 16) & 0x7ff; + virge->streams.pri_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f8: + virge->streams.sec_start = val; + virge->streams.sec_x = (val >> 16) & 0x7ff; + virge->streams.sec_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81fc: + virge->streams.sec_size = val; + virge->streams.sec_w = (val >> 16) & 0x7ff; + virge->streams.sec_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; - case 0x8504: - virge->subsys_stat &= ~(val & 0xff); - virge->subsys_cntl = (val >> 8); - s3_virge_update_irqs(virge); - break; + case 0x8504: + virge->subsys_stat &= ~(val & 0xff); + virge->subsys_cntl = (val >> 8); + s3_virge_update_irqs(virge); + break; - case 0x850c: - virge->advfunc_cntl = val & 0xff; - s3_virge_updatemapping(virge); - break; + case 0x850c: + virge->advfunc_cntl = val & 0xff; + s3_virge_updatemapping(virge); + break; - case 0xff20: - s3_virge_mmio_write(addr, val, priv); - break; + case 0x8590: + virge->cmd_dma_base = val; + virge->cmd_dma_buf_size = (val & 2) ? 0x10000 : 0x1000; + virge->cmd_dma_buf_size_mask = virge->cmd_dma_buf_size - 1; + virge->cmd_base_addr = (val & 2) ? (val & 0xffff0000) : (val & 0xfffff000); + break; + + case 0x8594: + virge->cmd_dma_write_ptr_update = val & (1 << 16); + if (virge->cmd_dma_write_ptr_update) { + virge->cmd_dma_write_ptr_reg = (virge->cmd_dma_buf_size == 0x10000) ? (val & 0xffff) : (val & 0xfff); + virge->dma_dbl_words = 0; + virge->dma_data_type = 0; + virge->dma_val = 0; + if (virge->cmd_dma) { + while (virge->cmd_dma_read_ptr_reg != virge->cmd_dma_write_ptr_reg) { + virge->cmd_dma_write_ptr_update = 0; + dma_bm_read(virge->cmd_base_addr + virge->cmd_dma_read_ptr_reg, (uint8_t *)&virge->dma_val, 4, 4); + if (!virge->dma_dbl_words) { + virge->dma_dbl_words = (virge->dma_val & 0xffff); + virge->dma_data_type = !!(virge->dma_val & (1 << 31)); + if (virge->dma_data_type) + virge->dma_mmio_addr = 0; + else + virge->dma_mmio_addr = ((virge->dma_val >> 16) << 2) & 0xfffc; + } else { + s3_virge_mmio_write_l(virge->dma_mmio_addr, virge->dma_val, virge); + virge->dma_dbl_words--; + virge->dma_mmio_addr = (virge->dma_mmio_addr + 4) & 0xfffc; + } + virge->cmd_dma_read_ptr_reg = (virge->cmd_dma_read_ptr_reg + 4) & (virge->cmd_dma_buf_size_mask - 3); + } + } + } + break; + + case 0x8598: + virge->cmd_dma_read_ptr_reg = (virge->cmd_dma_buf_size == 0x10000) ? (val & 0xffff) : (val & 0xfff); + break; + + case 0x859c: + virge->cmd_dma = val & 1; + virge->cmd_dma_write_ptr_reg = 0; + virge->cmd_dma_read_ptr_reg = 0; + break; + + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; + } } } @@ -2032,22 +2074,22 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { do { \ switch (bpp) { \ case 0: /*8 bpp*/ \ - val = vram[addr & svga->vram_mask]; \ + val = vram[addr & virge->vram_mask]; \ break; \ case 1: /*16 bpp*/ \ - val = *(uint16_t *)&vram[addr & svga->vram_mask]; \ + val = *(uint16_t *)&vram[addr & virge->vram_mask]; \ break; \ case 2: /*24 bpp*/ \ - val = (*(uint32_t *)&vram[addr & svga->vram_mask]) & 0xffffff; \ + val = (*(uint32_t *)&vram[addr & virge->vram_mask]) & 0xffffff; \ break; \ } \ } while (0) -#define Z_READ(addr) *(uint16_t *)&vram[addr & svga->vram_mask] +#define Z_READ(addr) *(uint16_t *)&vram[addr & virge->vram_mask] #define Z_WRITE(addr, val) \ if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ - *(uint16_t *)&vram[addr & svga->vram_mask] = val + *(uint16_t *)&vram[addr & virge->vram_mask] = val #define CLIP(x, y) \ do { \ @@ -2062,7 +2104,7 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || \ x > s3d_tri->clip_r || y < s3d_tri->clip_t || \ y > s3d_tri->clip_b)) \ - update = 0; \ + update = 0; \ } while (0) #define Z_CLIP(Zzb, Zs) \ @@ -2133,19 +2175,19 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { do { \ switch (bpp) { \ case 0: /*8 bpp*/ \ - vram[addr & svga->vram_mask] = val; \ - virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + vram[addr & virge->vram_mask] = val; \ + virge->svga.changedvram[(addr & virge->vram_mask) >> 12] = \ changeframecount; \ break; \ case 1: /*16 bpp*/ \ - *(uint16_t *)&vram[addr & svga->vram_mask] = val; \ - virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + *(uint16_t *)&vram[addr & virge->vram_mask] = val; \ + virge->svga.changedvram[(addr & virge->vram_mask) >> 12] = \ changeframecount; \ break; \ case 2: /*24 bpp*/ \ - *(uint32_t *)&vram[addr & svga->vram_mask] = (val & 0xffffff) |\ - (vram[(addr + 3) & svga->vram_mask] << 24); \ - virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + *(uint32_t *)&vram[addr & virge->vram_mask] = (val & 0xffffff) |\ + (vram[(addr + 3) & virge->vram_mask] << 24); \ + virge->svga.changedvram[(addr & virge->vram_mask) >> 12] = \ changeframecount; \ break; \ } \ @@ -2153,7 +2195,6 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { - svga_t *svga = &virge->svga; uint8_t *vram = virge->svga.vram; uint32_t mono_pattern[64]; int count_mask; @@ -3185,7 +3226,6 @@ dest_pixel_lit_texture_modulate(s3d_state_t *state) { static void tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) { - svga_t *svga = &virge->svga; uint8_t *vram = virge->svga.vram; int x_dir = s3d_tri->tlr ? 1 : -1; int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); @@ -3305,7 +3345,7 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int } } - virge->svga.changedvram[(dest_offset & svga->vram_mask) >> 12] = changeframecount; + virge->svga.changedvram[(dest_offset & virge->vram_mask) >> 12] = changeframecount; dest_addr = dest_offset + (x * (bpp + 1)); z_addr = z_offset + (x << 1); @@ -3343,16 +3383,17 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int uint32_t src_g = 0; uint32_t src_b = 0; + switch (bpp) { case 0: /*8 bpp*/ /*Not implemented yet*/ break; case 1: /*16 bpp*/ - src_col = *(uint16_t *)&vram[dest_addr & svga->vram_mask]; + src_col = *(uint16_t *)&vram[dest_addr & virge->vram_mask]; RGB15_TO_24(src_col, src_r, src_g, src_b); break; case 2: /*24 bpp*/ - src_col = (*(uint32_t *)&vram[dest_addr & svga->vram_mask]) & 0xffffff; + src_col = (*(uint32_t *)&vram[dest_addr & virge->vram_mask]) & 0xffffff; RGB24_TO_24(src_col, src_r, src_g, src_b); break; } @@ -3565,7 +3606,8 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) { } static void -render_thread(void *param) { +render_thread(void *param) +{ virge_t *virge = (virge_t *)param; while (virge->render_thread_run) { @@ -3576,7 +3618,7 @@ render_thread(void *param) { s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]); virge->s3d_read_idx++; - if (RB_ENTRIES == (RB_SIZE - 1)) + if (RB_ENTRIES == RB_MASK) thread_set_event(virge->not_full_event); } virge->s3d_busy = 0; @@ -3586,7 +3628,8 @@ render_thread(void *param) { } static void -queue_triangle(virge_t *virge) { +queue_triangle(virge_t *virge) +{ if (RB_FULL) { thread_reset_event(virge->not_full_event); if (RB_FULL) @@ -4361,7 +4404,7 @@ s3_virge_init(const device_t *info) io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge); - virge->pci_regs[PCI_REG_COMMAND] = 3; + virge->pci_regs[PCI_REG_COMMAND] = 7; virge->pci_regs[0x05] = 0; virge->pci_regs[0x06] = 0; virge->pci_regs[0x07] = 2; From 71ff5c8c6326957dfadb3e1d51fb80501dc12670 Mon Sep 17 00:00:00 2001 From: OBattler Date: Wed, 21 Aug 2024 01:33:54 +0200 Subject: [PATCH 24/26] ViRGE: Fixed some warnings that only appear when compiling a debug binary. --- src/video/vid_s3_virge.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index d0831ff4a..5d91d164e 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -2291,7 +2291,7 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); uint32_t source = 0; - uint32_t dest; + uint32_t dest = 0; uint32_t pattern; uint32_t out = 0; int update = 1; @@ -2396,7 +2396,7 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); uint32_t source = 0; - uint32_t dest; + uint32_t dest = 0; uint32_t pattern = virge->s3d.pat_fg_clr; uint32_t out = 0; int update = 1; @@ -2461,7 +2461,7 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); uint32_t source = 0; - uint32_t dest; + uint32_t dest = 0; uint32_t pattern; uint32_t out = 0; int update = 1; @@ -2517,7 +2517,7 @@ skip_line: do { uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); uint32_t source = 0; - uint32_t dest; + uint32_t dest = 0; uint32_t pattern; uint32_t out = 0; int update = 1; From 91a34a2b19b52c908e65f8bea01ee3ec838e6a26 Mon Sep 17 00:00:00 2001 From: OBattler Date: Wed, 21 Aug 2024 01:34:40 +0200 Subject: [PATCH 25/26] PCjr: Correctly mask the pallette lookups to only the lower 4 bits in the remaining modes as well. --- src/machine/m_pcjr.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/machine/m_pcjr.c b/src/machine/m_pcjr.c index 9ac4ff631..e569dc674 100644 --- a/src/machine/m_pcjr.c +++ b/src/machine/m_pcjr.c @@ -443,16 +443,16 @@ vid_poll(void *priv) pcjr->ma++; buffer32->line[l][ef_x] = buffer32->line[l][ef_x + 1] = buffer32->line[l + 1][ef_x] = buffer32->line[l + 1][ef_x + 1] = - pcjr->array[((dat >> 12) & pcjr->array[1]) + 16] + 16; + pcjr->array[((dat >> 12) & pcjr->array[1] & 0x0f) + 16] + 16; buffer32->line[l][ef_x + 2] = buffer32->line[l][ef_x + 3] = buffer32->line[l + 1][ef_x + 2] = buffer32->line[l + 1][ef_x + 3] = - pcjr->array[((dat >> 8) & pcjr->array[1]) + 16] + 16; + pcjr->array[((dat >> 8) & pcjr->array[1] & 0x0f) + 16] + 16; buffer32->line[l][ef_x + 4] = buffer32->line[l][ef_x + 5] = buffer32->line[l + 1][ef_x + 4] = buffer32->line[l + 1][ef_x + 5] = - pcjr->array[((dat >> 4) & pcjr->array[1]) + 16] + 16; + pcjr->array[((dat >> 4) & pcjr->array[1] & 0x0f) + 16] + 16; buffer32->line[l][ef_x + 6] = buffer32->line[l][ef_x + 7] = buffer32->line[l + 1][ef_x + 6] = buffer32->line[l + 1][ef_x + 7] = - pcjr->array[(dat & pcjr->array[1]) + 16] + 16; + pcjr->array[(dat & pcjr->array[1] & 0x0f) + 16] + 16; } break; case 0x12: /*160x200x16*/ @@ -493,7 +493,7 @@ vid_poll(void *priv) chr = (dat >> 7) & 1; chr |= ((dat >> 14) & 2); buffer32->line[l][ef_x + c] = buffer32->line[l + 1][ef_x + c] = - pcjr->array[(chr & pcjr->array[1]) + 16] + 16; + pcjr->array[(chr & pcjr->array[1] & 0x0f) + 16] + 16; dat <<= 1; } } @@ -505,13 +505,13 @@ vid_poll(void *priv) attr = pcjr->vram[((pcjr->ma << 1) & mask) + offset + 1]; drawcursor = ((pcjr->ma == ca) && pcjr->con && pcjr->cursoron); if (pcjr->array[3] & 4) { - cols[1] = pcjr->array[((attr & 15) & pcjr->array[1]) + 16] + 16; - cols[0] = pcjr->array[(((attr >> 4) & 7) & pcjr->array[1]) + 16] + 16; + cols[1] = pcjr->array[((attr & 15) & pcjr->array[1] & 0x0f) + 16] + 16; + cols[0] = pcjr->array[(((attr >> 4) & 7) & pcjr->array[1] & 0x0f) + 16] + 16; if ((pcjr->blink & 16) && (attr & 0x80) && !drawcursor) cols[1] = cols[0]; } else { - cols[1] = pcjr->array[((attr & 15) & pcjr->array[1]) + 16] + 16; - cols[0] = pcjr->array[((attr >> 4) & pcjr->array[1]) + 16] + 16; + cols[1] = pcjr->array[((attr & 15) & pcjr->array[1] & 0x0f) + 16] + 16; + cols[0] = pcjr->array[((attr >> 4) & pcjr->array[1] & 0x0f) + 16] + 16; } if (pcjr->sc & 8) for (uint8_t c = 0; c < 8; c++) @@ -537,13 +537,13 @@ vid_poll(void *priv) attr = pcjr->vram[((pcjr->ma << 1) & mask) + offset + 1]; drawcursor = ((pcjr->ma == ca) && pcjr->con && pcjr->cursoron); if (pcjr->array[3] & 4) { - cols[1] = pcjr->array[((attr & 15) & pcjr->array[1]) + 16] + 16; - cols[0] = pcjr->array[(((attr >> 4) & 7) & pcjr->array[1]) + 16] + 16; + cols[1] = pcjr->array[((attr & 15) & pcjr->array[1] & 0x0f) + 16] + 16; + cols[0] = pcjr->array[(((attr >> 4) & 7) & pcjr->array[1] & 0x0f) + 16] + 16; if ((pcjr->blink & 16) && (attr & 0x80) && !drawcursor) cols[1] = cols[0]; } else { - cols[1] = pcjr->array[((attr & 15) & pcjr->array[1]) + 16] + 16; - cols[0] = pcjr->array[((attr >> 4) & pcjr->array[1]) + 16] + 16; + cols[1] = pcjr->array[((attr & 15) & pcjr->array[1] & 0x0f) + 16] + 16; + cols[0] = pcjr->array[((attr >> 4) & pcjr->array[1] & 0x0f) + 16] + 16; } pcjr->ma++; if (pcjr->sc & 8) From e858a4344caabc28a6f2b68d3908d20720fa5480 Mon Sep 17 00:00:00 2001 From: OBattler Date: Wed, 21 Aug 2024 02:11:41 +0200 Subject: [PATCH 26/26] PCjr cartridges: Also reset the CPU on image eject. --- src/device/cartridge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/device/cartridge.c b/src/device/cartridge.c index edabd3ed0..a40c29c9b 100644 --- a/src/device/cartridge.c +++ b/src/device/cartridge.c @@ -169,6 +169,7 @@ cart_close(int drive) cart_image_close(drive); cart_fns[drive][0] = 0; ui_sb_update_icon_state(SB_CARTRIDGE | drive, 1); + resetx86(); } void