mirror of
https://github.com/86Box/86Box.git
synced 2026-02-24 20:35:32 -07:00
Added PAE, ported K6, P6, and WinChip 2 timings to the old recompiler, added a bunch of CPU's to the old recompiler, done some x87 fixes for both recompilers, added PAE, and fixed root directory entries for single-sided 5.25" DD floppies in the New Floppy Image dialog.
This commit is contained in:
@@ -11,9 +11,10 @@
|
||||
|
||||
void (*codegen_timing_start)();
|
||||
void (*codegen_timing_prefix)(uint8_t prefix, uint32_t fetchdat);
|
||||
void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32);
|
||||
void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc);
|
||||
void (*codegen_timing_block_start)();
|
||||
void (*codegen_timing_block_end)();
|
||||
int (*codegen_timing_jump_cycles)();
|
||||
|
||||
void codegen_timing_set(codegen_timing_t *timing)
|
||||
{
|
||||
@@ -22,6 +23,7 @@ void codegen_timing_set(codegen_timing_t *timing)
|
||||
codegen_timing_opcode = timing->opcode;
|
||||
codegen_timing_block_start = timing->block_start;
|
||||
codegen_timing_block_end = timing->block_end;
|
||||
codegen_timing_jump_cycles = timing->jump_cycles;
|
||||
}
|
||||
|
||||
int codegen_in_recompile;
|
||||
|
||||
@@ -319,23 +319,28 @@ extern int codegen_block_cycles;
|
||||
|
||||
extern void (*codegen_timing_start)();
|
||||
extern void (*codegen_timing_prefix)(uint8_t prefix, uint32_t fetchdat);
|
||||
extern void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32);
|
||||
extern void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc);
|
||||
extern void (*codegen_timing_block_start)();
|
||||
extern void (*codegen_timing_block_end)();
|
||||
extern int (*codegen_timing_jump_cycles)();
|
||||
|
||||
typedef struct codegen_timing_t
|
||||
{
|
||||
void (*start)();
|
||||
void (*prefix)(uint8_t prefix, uint32_t fetchdat);
|
||||
void (*opcode)(uint8_t opcode, uint32_t fetchdat, int op_32);
|
||||
void (*opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc);
|
||||
void (*block_start)();
|
||||
void (*block_end)();
|
||||
int (*jump_cycles)();
|
||||
} codegen_timing_t;
|
||||
|
||||
extern codegen_timing_t codegen_timing_pentium;
|
||||
extern codegen_timing_t codegen_timing_686;
|
||||
extern codegen_timing_t codegen_timing_486;
|
||||
extern codegen_timing_t codegen_timing_winchip;
|
||||
extern codegen_timing_t codegen_timing_winchip2;
|
||||
extern codegen_timing_t codegen_timing_k6;
|
||||
extern codegen_timing_t codegen_timing_p6;
|
||||
|
||||
void codegen_timing_set(codegen_timing_t *timing);
|
||||
|
||||
|
||||
@@ -5359,7 +5359,7 @@ static inline void MEM_CHECK_WRITE(x86seg *seg)
|
||||
load_param_1_reg_32(REG_EDI);
|
||||
load_param_2_32(&codeblock[block_current], 1);
|
||||
|
||||
call(&codeblock[block_current], (uintptr_t)mmutranslatereal);
|
||||
call(&codeblock[block_current], (uintptr_t)mmutranslatereal32);
|
||||
addbyte(0x80); /*CMP abrt, 0*/
|
||||
addbyte(0x7d);
|
||||
addbyte((uint8_t)cpu_state_offset(abrt));
|
||||
@@ -5498,7 +5498,7 @@ static inline void MEM_CHECK_WRITE_W(x86seg *seg)
|
||||
jump_pos = block_pos;
|
||||
load_param_1_reg_32(REG_EBX);
|
||||
load_param_2_32(&codeblock[block_current], 1);
|
||||
call(&codeblock[block_current], (uintptr_t)mmutranslatereal);
|
||||
call(&codeblock[block_current], (uintptr_t)mmutranslatereal32);
|
||||
addbyte(0x83); /*ADD EBX, 1*/
|
||||
addbyte(0xc3);
|
||||
addbyte(1);
|
||||
@@ -5647,7 +5647,7 @@ static inline void MEM_CHECK_WRITE_L(x86seg *seg)
|
||||
jump_pos = block_pos;
|
||||
load_param_1_reg_32(REG_EBX);
|
||||
load_param_2_32(&codeblock[block_current], 1);
|
||||
call(&codeblock[block_current], (uintptr_t)mmutranslatereal);
|
||||
call(&codeblock[block_current], (uintptr_t)mmutranslatereal32);
|
||||
addbyte(0x83); /*ADD EBX, 3*/
|
||||
addbyte(0xc3);
|
||||
addbyte(3);
|
||||
|
||||
@@ -300,7 +300,7 @@ void codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat)
|
||||
last_prefix = prefix;
|
||||
}
|
||||
|
||||
void codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
void codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
||||
{
|
||||
int **timings;
|
||||
uint64_t *deps;
|
||||
@@ -360,7 +360,7 @@ void codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
{
|
||||
case 0x80: case 0x82: case 0x83:
|
||||
timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x_mod3;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0x81:
|
||||
@@ -416,5 +416,6 @@ codegen_timing_t codegen_timing_486 =
|
||||
codegen_timing_486_prefix,
|
||||
codegen_timing_486_opcode,
|
||||
codegen_timing_486_block_start,
|
||||
codegen_timing_486_block_end
|
||||
codegen_timing_486_block_end,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -826,7 +826,7 @@ static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_3
|
||||
return 0;
|
||||
}
|
||||
|
||||
void codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
void codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
||||
{
|
||||
uint32_t *timings;
|
||||
uint64_t *deps;
|
||||
@@ -886,7 +886,7 @@ void codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
{
|
||||
case 0x80: case 0x82: case 0x83:
|
||||
timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x_mod3;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0x81:
|
||||
@@ -1052,5 +1052,6 @@ codegen_timing_t codegen_timing_686 =
|
||||
codegen_timing_686_prefix,
|
||||
codegen_timing_686_opcode,
|
||||
codegen_timing_686_block_start,
|
||||
codegen_timing_686_block_end
|
||||
codegen_timing_686_block_end,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
#include <wchar.h>
|
||||
#include <86box/86box.h>
|
||||
#include "cpu.h"
|
||||
#include "codegen_timing_common.h"
|
||||
#include <86box/mem.h>
|
||||
|
||||
#include "codegen_timing_common.h"
|
||||
|
||||
uint64_t opcode_deps[256] =
|
||||
{
|
||||
@@ -280,7 +281,7 @@ uint64_t opcode_deps_0f[256] =
|
||||
/*00*/ MODRM, MODRM, MODRM, MODRM,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, MODRM, 0, MODRM,
|
||||
|
||||
/*10*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
@@ -362,7 +363,7 @@ uint64_t opcode_deps_0f_mod3[256] =
|
||||
/*00*/ MODRM, MODRM, MODRM, MODRM,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, MODRM, 0, MODRM,
|
||||
|
||||
/*10*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
@@ -440,6 +441,171 @@ uint64_t opcode_deps_0f_mod3[256] =
|
||||
MODRM, MODRM, MODRM, 0,
|
||||
};
|
||||
|
||||
uint64_t opcode_deps_0f0f[256] =
|
||||
{
|
||||
/*00*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, MODRM, 0, 0,
|
||||
|
||||
/*10*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, MODRM, 0, 0,
|
||||
|
||||
/*20*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*30*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*40*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*50*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*60*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*70*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*80*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*90*/ MODRM, 0, 0, 0,
|
||||
MODRM, 0, MODRM, MODRM,
|
||||
0, 0, MODRM, 0,
|
||||
0, 0, MODRM, 0,
|
||||
|
||||
/*a0*/ MODRM, 0, 0, 0,
|
||||
MODRM, 0, MODRM, MODRM,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*b0*/ MODRM, 0, 0, 0,
|
||||
MODRM, 0, MODRM, MODRM,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, MODRM,
|
||||
|
||||
/*c0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*d0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*e0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*f0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
};
|
||||
uint64_t opcode_deps_0f0f_mod3[256] =
|
||||
{
|
||||
/*00*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, MODRM, 0, 0,
|
||||
|
||||
/*10*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, MODRM, 0, 0,
|
||||
|
||||
/*20*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*30*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*40*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*50*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*60*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*70*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*80*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*90*/ MODRM, 0, 0, 0,
|
||||
MODRM, 0, MODRM, MODRM,
|
||||
0, 0, MODRM, 0,
|
||||
0, 0, MODRM, 0,
|
||||
|
||||
/*a0*/ MODRM, 0, 0, 0,
|
||||
MODRM, 0, MODRM, MODRM,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*b0*/ MODRM, 0, 0, 0,
|
||||
MODRM, 0, MODRM, MODRM,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, MODRM,
|
||||
|
||||
/*c0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*d0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*e0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
||||
/*f0*/ 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
};
|
||||
|
||||
uint64_t opcode_deps_shift[8] =
|
||||
{
|
||||
MODRM, MODRM, MODRM, MODRM,
|
||||
@@ -664,21 +830,21 @@ uint64_t opcode_deps_df_mod3[8] =
|
||||
|
||||
uint64_t opcode_deps_81[8] =
|
||||
{
|
||||
MODRM, MODRM, MODRM, MODRM,
|
||||
MODRM, MODRM, MODRM, MODRM
|
||||
MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632,
|
||||
MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632
|
||||
};
|
||||
uint64_t opcode_deps_81_mod3[8] =
|
||||
{
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM,
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | MODRM
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632,
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | MODRM | HAS_IMM1632
|
||||
};
|
||||
uint64_t opcode_deps_8x[8] =
|
||||
{
|
||||
MODRM, MODRM, MODRM, MODRM,
|
||||
MODRM, MODRM, MODRM, MODRM
|
||||
MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8,
|
||||
MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8
|
||||
};
|
||||
uint64_t opcode_deps_8x_mod3[8] =
|
||||
{
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM,
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | MODRM
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8,
|
||||
SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | MODRM | HAS_IMM8
|
||||
};
|
||||
|
||||
@@ -73,6 +73,10 @@
|
||||
#define FPU_FXCH (1ull << 33)
|
||||
|
||||
|
||||
#define HAS_IMM8 (1ull << 34)
|
||||
#define HAS_IMM1632 (1ull << 35)
|
||||
|
||||
|
||||
#define REGMASK_IMPL_ESP (1 << 8)
|
||||
#define REGMASK_SHIFTPACK (1 << 9)
|
||||
#define REGMASK_MULTIPLY (1 << 9)
|
||||
@@ -82,6 +86,8 @@ extern uint64_t opcode_deps[256];
|
||||
extern uint64_t opcode_deps_mod3[256];
|
||||
extern uint64_t opcode_deps_0f[256];
|
||||
extern uint64_t opcode_deps_0f_mod3[256];
|
||||
extern uint64_t opcode_deps_0f0f[256];
|
||||
extern uint64_t opcode_deps_0f0f_mod3[256];
|
||||
extern uint64_t opcode_deps_shift[8];
|
||||
extern uint64_t opcode_deps_shift_mod3[8];
|
||||
extern uint64_t opcode_deps_shift_cl[8];
|
||||
|
||||
2353
src/cpu/codegen_timing_k6.c
Normal file
2353
src/cpu/codegen_timing_k6.c
Normal file
File diff suppressed because it is too large
Load Diff
2330
src/cpu/codegen_timing_p6.c
Normal file
2330
src/cpu/codegen_timing_p6.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1076,7 +1076,7 @@ static void codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcod
|
||||
}
|
||||
}
|
||||
|
||||
void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
||||
{
|
||||
uint64_t *timings;
|
||||
uint64_t *deps;
|
||||
@@ -1137,7 +1137,7 @@ void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
{
|
||||
case 0x80: case 0x82: case 0x83:
|
||||
timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x_mod3;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0x81:
|
||||
@@ -1318,5 +1318,6 @@ codegen_timing_t codegen_timing_pentium =
|
||||
codegen_timing_pentium_prefix,
|
||||
codegen_timing_pentium_opcode,
|
||||
codegen_timing_pentium_block_start,
|
||||
codegen_timing_pentium_block_end
|
||||
codegen_timing_pentium_block_end,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -300,7 +300,7 @@ void codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat)
|
||||
last_prefix = prefix;
|
||||
}
|
||||
|
||||
void codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
void codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
||||
{
|
||||
int **timings;
|
||||
uint64_t *deps;
|
||||
@@ -360,7 +360,7 @@ void codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
{
|
||||
case 0x80: case 0x82: case 0x83:
|
||||
timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x_mod3;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0x81:
|
||||
@@ -416,5 +416,6 @@ codegen_timing_t codegen_timing_winchip =
|
||||
codegen_timing_winchip_prefix,
|
||||
codegen_timing_winchip_opcode,
|
||||
codegen_timing_winchip_block_start,
|
||||
codegen_timing_winchip_block_end
|
||||
codegen_timing_winchip_block_end,
|
||||
NULL
|
||||
};
|
||||
|
||||
743
src/cpu/codegen_timing_winchip2.c
Normal file
743
src/cpu/codegen_timing_winchip2.c
Normal file
@@ -0,0 +1,743 @@
|
||||
/*Since IDT/Centaur didn't document cycle timings in the WinChip datasheets, and
|
||||
I don't currently own a WinChip 2 to test against, most of the timing here is
|
||||
a guess. This code makes the current (probably wrong) assumptions :
|
||||
- FPU uses same timings as a Pentium, except for FXCH (which doesn't pair)
|
||||
- 3DNow! instructions perfectly pair
|
||||
- MMX follows mostly Pentium rules - one pipeline has shift/pack, one has
|
||||
multiply, and other instructions can execute in either pipeline
|
||||
- Instructions with prefixes can pair if both instructions are fully decoded
|
||||
when the first instruction starts execution.*/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
#include <86box/86box.h>
|
||||
#include "cpu.h"
|
||||
#include <86box/mem.h>
|
||||
|
||||
#include "x86.h"
|
||||
#include "x86_ops.h"
|
||||
#include "x87.h"
|
||||
#include "codegen.h"
|
||||
#include "codegen_ops.h"
|
||||
#include "codegen_timing_common.h"
|
||||
|
||||
/*Instruction has different execution time for 16 and 32 bit data. Does not pair */
|
||||
#define CYCLES_HAS_MULTI (1 << 31)
|
||||
|
||||
#define CYCLES_FPU (1 << 30)
|
||||
|
||||
#define CYCLES_IS_MMX_MUL (1 << 29)
|
||||
#define CYCLES_IS_MMX_SHIFT (1 << 28)
|
||||
#define CYCLES_IS_MMX_ANY (1 << 27)
|
||||
#define CYCLES_IS_3DNOW (1 << 26)
|
||||
|
||||
#define CYCLES_MMX_MUL(c) (CYCLES_IS_MMX_MUL | c)
|
||||
#define CYCLES_MMX_SHIFT(c) (CYCLES_IS_MMX_SHIFT | c)
|
||||
#define CYCLES_MMX_ANY(c) (CYCLES_IS_MMX_ANY | c)
|
||||
#define CYCLES_3DNOW(c) (CYCLES_IS_3DNOW | c)
|
||||
|
||||
#define CYCLES_IS_MMX (CYCLES_IS_MMX_MUL | CYCLES_IS_MMX_SHIFT | CYCLES_IS_MMX_ANY | CYCLES_IS_3DNOW)
|
||||
|
||||
#define GET_CYCLES(c) (c & ~(CYCLES_HAS_MULTI | CYCLES_FPU | CYCLES_IS_MMX))
|
||||
|
||||
#define CYCLES(c) c
|
||||
#define CYCLES2(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8))
|
||||
|
||||
/*comp_time = cycles until instruction complete
|
||||
i_overlap = cycles that overlap with integer
|
||||
f_overlap = cycles that overlap with subsequent FPU*/
|
||||
#define FPU_CYCLES(comp_time, i_overlap, f_overlap) (comp_time) | (i_overlap << 8) | (f_overlap << 16) | CYCLES_FPU
|
||||
|
||||
#define FPU_COMP_TIME(timing) (timing & 0xff)
|
||||
#define FPU_I_OVERLAP(timing) ((timing >> 8) & 0xff)
|
||||
#define FPU_F_OVERLAP(timing) ((timing >> 16) & 0xff)
|
||||
|
||||
#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing))
|
||||
|
||||
#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing))
|
||||
|
||||
#define FPU_RESULT_LATENCY(timing) ((timing >> 8) & 0xff)
|
||||
|
||||
#define INVALID 0
|
||||
|
||||
static uint32_t opcode_timings[256] =
|
||||
{
|
||||
/*00*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), INVALID,
|
||||
/*10*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3),
|
||||
/*20*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3),
|
||||
/*30*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2),
|
||||
|
||||
/*40*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES2(17,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17),
|
||||
/*70*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
|
||||
/*80*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(5), CYCLES(6),
|
||||
/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3),
|
||||
/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6),
|
||||
/*b0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
|
||||
/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(3), CYCLES(0),
|
||||
/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14),
|
||||
/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_mod3[256] =
|
||||
{
|
||||
/*00*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), INVALID,
|
||||
/*10*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3),
|
||||
/*20*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3),
|
||||
/*30*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2),
|
||||
|
||||
/*40*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES2(14,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17),
|
||||
/*70*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
|
||||
/*80*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(1), CYCLES(2), CYCLES(1),
|
||||
/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3),
|
||||
/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6),
|
||||
/*b0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
|
||||
/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(3), CYCLES(0),
|
||||
/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14),
|
||||
/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID,
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_0f[256] =
|
||||
{
|
||||
/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1),
|
||||
/*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
/*40*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*50*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*60*/ CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2),
|
||||
/*70*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES(100), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2),
|
||||
|
||||
/*80*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3),
|
||||
/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), INVALID, INVALID, CYCLES(3), CYCLES(3), INVALID, CYCLES(13), CYCLES(3), CYCLES(3), INVALID, CYCLES2(18,30),
|
||||
/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), INVALID, INVALID, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3),
|
||||
|
||||
/*c0*/ CYCLES(4), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*d0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2),
|
||||
/*e0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2),
|
||||
/*f0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID,
|
||||
};
|
||||
static uint32_t opcode_timings_0f_mod3[256] =
|
||||
{
|
||||
/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1),
|
||||
/*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
/*40*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*50*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
|
||||
/*60*/ CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1),
|
||||
/*70*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES(100), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1),
|
||||
|
||||
/*80*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3),
|
||||
/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), INVALID, INVALID, CYCLES(3), CYCLES(3), INVALID, CYCLES(13), CYCLES(3), CYCLES(3), INVALID, CYCLES2(18,30),
|
||||
/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), INVALID, INVALID, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3),
|
||||
|
||||
/*c0*/ CYCLES(4), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1),
|
||||
/*d0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1),
|
||||
/*e0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1),
|
||||
/*f0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID,
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_shift[8] =
|
||||
{
|
||||
CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7)
|
||||
};
|
||||
static uint32_t opcode_timings_shift_mod3[8] =
|
||||
{
|
||||
CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3)
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_f6[8] =
|
||||
{
|
||||
CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19)
|
||||
};
|
||||
static uint32_t opcode_timings_f6_mod3[8] =
|
||||
{
|
||||
CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19)
|
||||
};
|
||||
static uint32_t opcode_timings_f7[8] =
|
||||
{
|
||||
CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43)
|
||||
};
|
||||
static uint32_t opcode_timings_f7_mod3[8] =
|
||||
{
|
||||
CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43)
|
||||
};
|
||||
static uint32_t opcode_timings_ff[8] =
|
||||
{
|
||||
CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID
|
||||
};
|
||||
static uint32_t opcode_timings_ff_mod3[8] =
|
||||
{
|
||||
CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_d8[8] =
|
||||
{
|
||||
/* FADDs FMULs FCOMs FCOMPs*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/* FSUBs FSUBRs FDIVs FDIVRs*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2)
|
||||
};
|
||||
static uint32_t opcode_timings_d8_mod3[8] =
|
||||
{
|
||||
/* FADD FMUL FCOM FCOMP*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/* FSUB FSUBR FDIV FDIVR*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2)
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_d9[8] =
|
||||
{
|
||||
/* FLDs FSTs FSTPs*/
|
||||
FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0),
|
||||
/* FLDENV FLDCW FSTENV FSTCW*/
|
||||
FPU_CYCLES(32,0,0), FPU_CYCLES(8,0,0), FPU_CYCLES(48,0,0), FPU_CYCLES(2,0,0)
|
||||
};
|
||||
static uint32_t opcode_timings_d9_mod3[64] =
|
||||
{
|
||||
/*FLD*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/*FXCH*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/*FNOP*/
|
||||
FPU_CYCLES(3,0,0), INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
/*FSTP*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/* opFCHS opFABS*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), INVALID, INVALID,
|
||||
/* opFTST opFXAM*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(21,4,0), INVALID, INVALID,
|
||||
/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/
|
||||
FPU_CYCLES(2,0,0), FPU_CYCLES(5,2,2), FPU_CYCLES(5,2,2), FPU_CYCLES(5,2,2),
|
||||
/* opFLDEG2 opFLDLN2 opFLDZ*/
|
||||
FPU_CYCLES(5,2,2), FPU_CYCLES(5,2,2), FPU_CYCLES(2,0,0), INVALID,
|
||||
/* opF2XM1 opFYL2X opFPTAN opFPATAN*/
|
||||
FPU_CYCLES(53,2,2), FPU_CYCLES(103,2,2),FPU_CYCLES(120,36,0),FPU_CYCLES(112,2,2),
|
||||
/* opFDECSTP opFINCSTP,*/
|
||||
INVALID, INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0),
|
||||
/* opFPREM opFSQRT opFSINCOS*/
|
||||
FPU_CYCLES(64,2,2), INVALID, FPU_CYCLES(70,69,2),FPU_CYCLES(89,2,2),
|
||||
/* opFRNDINT opFSCALE opFSIN opFCOS*/
|
||||
FPU_CYCLES(9,0,0), FPU_CYCLES(20,5,0), FPU_CYCLES(65,2,2), FPU_CYCLES(65,2,2)
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_da[8] =
|
||||
{
|
||||
/* FIADDl FIMULl FICOMl FICOMPl*/
|
||||
FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0),
|
||||
/* FISUBl FISUBRl FIDIVl FIDIVRl*/
|
||||
FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2)
|
||||
};
|
||||
static uint32_t opcode_timings_da_mod3[8] =
|
||||
{
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
/* FCOMPP*/
|
||||
INVALID, FPU_CYCLES(1,0,0), INVALID, INVALID
|
||||
};
|
||||
|
||||
|
||||
static uint32_t opcode_timings_db[8] =
|
||||
{
|
||||
/* FLDil FSTil FSTPil*/
|
||||
FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0),
|
||||
/* FLDe FSTPe*/
|
||||
INVALID, FPU_CYCLES(3,0,0), INVALID, FPU_CYCLES(3,0,0)
|
||||
};
|
||||
static uint32_t opcode_timings_db_mod3[64] =
|
||||
{
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
/* opFNOP opFCLEX opFINIT*/
|
||||
INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(7,0,0), FPU_CYCLES(17,0,0),
|
||||
/* opFNOP opFNOP*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), INVALID, INVALID,
|
||||
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_dc[8] =
|
||||
{
|
||||
/* FADDd FMULd FCOMd FCOMPd*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/* FSUBd FSUBRd FDIVd FDIVRd*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2)
|
||||
};
|
||||
static uint32_t opcode_timings_dc_mod3[8] =
|
||||
{
|
||||
/* opFADDr opFMULr*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),INVALID, INVALID,
|
||||
/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2)
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_dd[8] =
|
||||
{
|
||||
/* FLDd FSTd FSTPd*/
|
||||
FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0),
|
||||
/* FRSTOR FSAVE FSTSW*/
|
||||
FPU_CYCLES(70,0,0), INVALID, FPU_CYCLES(127,0,0), FPU_CYCLES(6,0,0)
|
||||
};
|
||||
static uint32_t opcode_timings_dd_mod3[8] =
|
||||
{
|
||||
/* FFFREE FST FSTP*/
|
||||
FPU_CYCLES(2,0,0), INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),
|
||||
/* FUCOM FUCOMP*/
|
||||
FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),INVALID, INVALID
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_de[8] =
|
||||
{
|
||||
/* FIADDw FIMULw FICOMw FICOMPw*/
|
||||
FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0),
|
||||
/* FISUBw FISUBRw FIDIVw FIDIVRw*/
|
||||
FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2)
|
||||
};
|
||||
static uint32_t opcode_timings_de_mod3[8] =
|
||||
{
|
||||
/* FADDP FMULP FCOMPP*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(1,0,0),
|
||||
/* FSUBP FSUBRP FDIVP FDIVRP*/
|
||||
FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2)
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_df[8] =
|
||||
{
|
||||
/* FILDiw FISTiw FISTPiw*/
|
||||
FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0),
|
||||
/* FILDiq FBSTP FISTPiq*/
|
||||
INVALID, FPU_CYCLES(3,2,2), FPU_CYCLES(148,0,0), FPU_CYCLES(6,0,0)
|
||||
};
|
||||
static uint32_t opcode_timings_df_mod3[8] =
|
||||
{
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
/* FSTSW AX*/
|
||||
FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID
|
||||
};
|
||||
|
||||
static uint32_t opcode_timings_8x[8] =
|
||||
{
|
||||
CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2)
|
||||
};
|
||||
static uint32_t opcode_timings_8x_mod3[8] =
|
||||
{
|
||||
CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2)
|
||||
};
|
||||
static uint32_t opcode_timings_81[8] =
|
||||
{
|
||||
CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2)
|
||||
};
|
||||
static uint32_t opcode_timings_81_mod3[8] =
|
||||
{
|
||||
CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2)
|
||||
};
|
||||
|
||||
static int timing_count;
|
||||
static uint8_t last_prefix;
|
||||
static uint32_t regmask_modified;
|
||||
static int decode_delay, decode_delay_offset;
|
||||
static int fpu_latency;
|
||||
static int fpu_st_latency[8];
|
||||
|
||||
static int u_pipe_full;
|
||||
static uint32_t u_pipe_opcode;
|
||||
static uint32_t *u_pipe_timings;
|
||||
static uint32_t u_pipe_op_32;
|
||||
static uint32_t u_pipe_regmask;
|
||||
static uint32_t u_pipe_fetchdat;
|
||||
static int u_pipe_decode_delay_offset;
|
||||
static uint64_t *u_pipe_deps;
|
||||
|
||||
int can_pair(uint32_t timing_a, uint32_t timing_b, uint8_t regmask_b)
|
||||
{
|
||||
/*Only MMX/3DNow instructions can pair*/
|
||||
if (!(timing_b & CYCLES_IS_MMX))
|
||||
return 0;
|
||||
/*Only one MMX multiply per cycle*/
|
||||
if ((timing_a & CYCLES_IS_MMX_MUL) && (timing_b & CYCLES_IS_MMX_MUL))
|
||||
return 0;
|
||||
/*Only one MMX shift/pack per cycle*/
|
||||
if ((timing_a & CYCLES_IS_MMX_SHIFT) && (timing_b & CYCLES_IS_MMX_SHIFT))
|
||||
return 0;
|
||||
/*Second instruction can not access registers written by first*/
|
||||
if (u_pipe_regmask & regmask_b)
|
||||
return 0;
|
||||
/*Must have had enough time to decode prefixes*/
|
||||
if ((decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) > 0)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int COUNT(uint32_t c, int op_32)
|
||||
{
|
||||
if (c & CYCLES_FPU)
|
||||
return FPU_I_LATENCY(c);
|
||||
if (c & CYCLES_HAS_MULTI)
|
||||
{
|
||||
if (op_32 & 0x100)
|
||||
return (c >> 8) & 0xff;
|
||||
return c & 0xff;
|
||||
}
|
||||
return GET_CYCLES(c);
|
||||
}
|
||||
|
||||
static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32)
|
||||
{
|
||||
uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32);
|
||||
|
||||
/*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not
|
||||
cause AGIs with each other, but do with instructions that use it explicitly*/
|
||||
if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP))
|
||||
addr_regmask |= (1 << REG_ESP);
|
||||
|
||||
return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP;
|
||||
}
|
||||
|
||||
static int codegen_fpu_latencies(uint64_t deps, int reg)
|
||||
{
|
||||
int latency = fpu_latency;
|
||||
|
||||
if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency)
|
||||
latency = fpu_st_latency[0];
|
||||
if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency)
|
||||
latency = fpu_st_latency[1];
|
||||
if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency)
|
||||
latency = fpu_st_latency[reg];
|
||||
|
||||
return latency;
|
||||
}
|
||||
|
||||
#define SUB_AND_CLAMP(latency, count) \
|
||||
latency -= count; \
|
||||
if (latency < 0) \
|
||||
latency = 0
|
||||
|
||||
static void codegen_fpu_latency_clock(int count)
|
||||
{
|
||||
SUB_AND_CLAMP(fpu_latency, count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[0], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[1], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[2], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[3], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[4], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[5], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[6], count);
|
||||
SUB_AND_CLAMP(fpu_st_latency[7], count);
|
||||
}
|
||||
|
||||
static void codegen_instruction(uint32_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay)
|
||||
{
|
||||
int instr_cycles, latency = 0;
|
||||
|
||||
if ((timings[opcode] & CYCLES_FPU) && !(deps[opcode] & FPU_FXCH))
|
||||
instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7);
|
||||
else
|
||||
instr_cycles = 0;
|
||||
|
||||
if ((decode_delay + decode_delay_offset) > 0)
|
||||
codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles);
|
||||
else
|
||||
codegen_fpu_latency_clock(instr_cycles);
|
||||
instr_cycles += COUNT(timings[opcode], op_32);
|
||||
instr_cycles += exec_delay;
|
||||
if ((decode_delay + decode_delay_offset) > 0)
|
||||
codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset;
|
||||
else
|
||||
codegen_block_cycles += instr_cycles;
|
||||
decode_delay = (-instr_cycles) + 1;
|
||||
|
||||
if (deps[opcode] & FPU_POP)
|
||||
{
|
||||
int c;
|
||||
|
||||
for (c = 0; c < 7; c++)
|
||||
fpu_st_latency[c] = fpu_st_latency[c+1];
|
||||
fpu_st_latency[7] = 0;
|
||||
}
|
||||
if (deps[opcode] & FPU_POP2)
|
||||
{
|
||||
int c;
|
||||
|
||||
for (c = 0; c < 6; c++)
|
||||
fpu_st_latency[c] = fpu_st_latency[c+2];
|
||||
fpu_st_latency[6] = fpu_st_latency[7] = 0;
|
||||
}
|
||||
if (timings[opcode] & CYCLES_FPU)
|
||||
{
|
||||
/* if (fpu_latency)
|
||||
fatal("Bad latency FPU\n");*/
|
||||
fpu_latency = FPU_F_LATENCY(timings[opcode]);
|
||||
}
|
||||
|
||||
if (deps[opcode] & FPU_PUSH)
|
||||
{
|
||||
int c;
|
||||
|
||||
for (c = 0; c < 7; c++)
|
||||
fpu_st_latency[c+1] = fpu_st_latency[c];
|
||||
fpu_st_latency[0] = 0;
|
||||
}
|
||||
if (deps[opcode] & FPU_WRITE_ST0)
|
||||
{
|
||||
/* if (fpu_st_latency[0])
|
||||
fatal("Bad latency ST0\n");*/
|
||||
fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]);
|
||||
}
|
||||
if (deps[opcode] & FPU_WRITE_ST1)
|
||||
{
|
||||
/* if (fpu_st_latency[1])
|
||||
fatal("Bad latency ST1\n");*/
|
||||
fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]);
|
||||
}
|
||||
if (deps[opcode] & FPU_WRITE_STREG)
|
||||
{
|
||||
int reg = fetchdat & 7;
|
||||
if (deps[opcode] & FPU_POP)
|
||||
reg--;
|
||||
if (reg >= 0 &&
|
||||
!(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) &&
|
||||
!(reg == 1 && (deps[opcode] & FPU_WRITE_ST1)))
|
||||
{
|
||||
/* if (fpu_st_latency[reg])
|
||||
fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/
|
||||
fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void codegen_timing_winchip2_block_start()
|
||||
{
|
||||
regmask_modified = 0;
|
||||
decode_delay = decode_delay_offset = 0;
|
||||
u_pipe_full = 0;
|
||||
}
|
||||
|
||||
static void codegen_timing_winchip2_start()
|
||||
{
|
||||
timing_count = 0;
|
||||
last_prefix = 0;
|
||||
}
|
||||
|
||||
static void codegen_timing_winchip2_prefix(uint8_t prefix, uint32_t fetchdat)
|
||||
{
|
||||
if (prefix == 0x0f)
|
||||
{
|
||||
/*0fh prefix is 'free'*/
|
||||
last_prefix = prefix;
|
||||
return;
|
||||
}
|
||||
/*On WinChip all prefixes take 1 cycle to decode. Decode may be shadowed
|
||||
by execution of previous instructions*/
|
||||
decode_delay_offset++;
|
||||
last_prefix = prefix;
|
||||
}
|
||||
|
||||
static void codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
||||
{
|
||||
uint32_t *timings;
|
||||
uint64_t *deps;
|
||||
int mod3 = ((fetchdat & 0xc0) == 0xc0);
|
||||
int bit8 = !(opcode & 1);
|
||||
int agi_stall = 0;
|
||||
|
||||
switch (last_prefix)
|
||||
{
|
||||
case 0x0f:
|
||||
timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f;
|
||||
deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f;
|
||||
break;
|
||||
|
||||
case 0xd8:
|
||||
timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8;
|
||||
deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8;
|
||||
opcode = (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xd9:
|
||||
timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9;
|
||||
deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9;
|
||||
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xda:
|
||||
timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da;
|
||||
deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da;
|
||||
opcode = (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xdb:
|
||||
timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db;
|
||||
deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db;
|
||||
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xdc:
|
||||
timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc;
|
||||
deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc;
|
||||
opcode = (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xdd:
|
||||
timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd;
|
||||
deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd;
|
||||
opcode = (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xde:
|
||||
timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de;
|
||||
deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de;
|
||||
opcode = (opcode >> 3) & 7;
|
||||
break;
|
||||
case 0xdf:
|
||||
timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df;
|
||||
deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df;
|
||||
opcode = (opcode >> 3) & 7;
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (opcode)
|
||||
{
|
||||
case 0x80: case 0x82: case 0x83:
|
||||
timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x;
|
||||
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0x81:
|
||||
timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81;
|
||||
deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
|
||||
case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3:
|
||||
timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift;
|
||||
deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
|
||||
case 0xf6:
|
||||
timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6;
|
||||
deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0xf7:
|
||||
timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7;
|
||||
deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
case 0xff:
|
||||
timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff;
|
||||
deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff;
|
||||
opcode = (fetchdat >> 3) & 7;
|
||||
break;
|
||||
|
||||
default:
|
||||
timings = mod3 ? opcode_timings_mod3 : opcode_timings;
|
||||
deps = mod3 ? opcode_deps_mod3 : opcode_deps;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (u_pipe_full)
|
||||
{
|
||||
uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32);
|
||||
|
||||
if (can_pair(u_pipe_timings[u_pipe_opcode], timings[opcode], regmask))
|
||||
{
|
||||
int cycles_a = u_pipe_timings[u_pipe_opcode] & 0xff;
|
||||
int cycles_b = timings[opcode] & 0xff;
|
||||
uint32_t timing = (cycles_a > cycles_b) ? u_pipe_timings[u_pipe_opcode] : timings[opcode];
|
||||
uint64_t temp_deps = 0;
|
||||
|
||||
if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
|
||||
agi_stall = 1;
|
||||
|
||||
codegen_instruction(&timing, &temp_deps, 0, 0, 0, 0, agi_stall);
|
||||
u_pipe_full = 0;
|
||||
decode_delay_offset = 0;
|
||||
regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*No pairing, run first instruction now*/
|
||||
if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
|
||||
agi_stall = 1;
|
||||
codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall);
|
||||
u_pipe_full = 0;
|
||||
regmask_modified = u_pipe_regmask;
|
||||
}
|
||||
}
|
||||
if (timings[opcode] & CYCLES_IS_MMX)
|
||||
{
|
||||
/*Might pair with next instruction*/
|
||||
u_pipe_full = 1;
|
||||
u_pipe_opcode = opcode;
|
||||
u_pipe_timings = timings;
|
||||
u_pipe_op_32 = op_32;
|
||||
u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8);
|
||||
u_pipe_fetchdat = fetchdat;
|
||||
u_pipe_decode_delay_offset = decode_delay_offset;
|
||||
u_pipe_deps = deps;
|
||||
decode_delay_offset = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_agi(deps, opcode, fetchdat, op_32))
|
||||
agi_stall = 1;
|
||||
codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall);
|
||||
decode_delay_offset = 0;
|
||||
regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8);
|
||||
}
|
||||
|
||||
static void codegen_timing_winchip2_block_end()
|
||||
{
|
||||
if (u_pipe_full)
|
||||
{
|
||||
int agi_stall = 0;
|
||||
|
||||
if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
|
||||
agi_stall = 1;
|
||||
codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall);
|
||||
u_pipe_full = 0;
|
||||
}
|
||||
}
|
||||
|
||||
codegen_timing_t codegen_timing_winchip2 =
|
||||
{
|
||||
codegen_timing_winchip2_start,
|
||||
codegen_timing_winchip2_prefix,
|
||||
codegen_timing_winchip2_opcode,
|
||||
codegen_timing_winchip2_block_start,
|
||||
codegen_timing_winchip2_block_end,
|
||||
NULL
|
||||
};
|
||||
@@ -1067,7 +1067,7 @@ void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t
|
||||
}
|
||||
|
||||
generate_call:
|
||||
codegen_timing_opcode(opcode, fetchdat, op_32);
|
||||
codegen_timing_opcode(opcode, fetchdat, op_32, op_pc);
|
||||
|
||||
if ((op_table == x86_dynarec_opcodes &&
|
||||
((opcode & 0xf0) == 0x70 || (opcode & 0xfc) == 0xe0 || opcode == 0xc2 ||
|
||||
@@ -1075,6 +1075,24 @@ generate_call:
|
||||
(opcode == 0xff && ((fetchdat & 0x38) >= 0x10 && (fetchdat & 0x38) < 0x30)))) ||
|
||||
(op_table == x86_dynarec_opcodes_0f && ((opcode & 0xf0) == 0x80)))
|
||||
{
|
||||
/*On some CPUs (eg K6), a jump/branch instruction may be able to pair with
|
||||
subsequent instructions, so no cycles may have been deducted for it yet.
|
||||
To prevent having zero cycle blocks (eg with a jump instruction pointing
|
||||
to itself), apply the cycles that would be taken if this jump is taken,
|
||||
then reverse it for subsequent instructions if the jump is not taken*/
|
||||
int jump_cycles = 0;
|
||||
|
||||
if (codegen_timing_jump_cycles != NULL)
|
||||
codegen_timing_jump_cycles();
|
||||
|
||||
if (jump_cycles)
|
||||
{
|
||||
addbyte(0x81); /*SUB $jump_cycles, cyclcs*/
|
||||
addbyte(0x6d);
|
||||
addbyte((uint8_t)cpu_state_offset(_cycles));
|
||||
addlong((uint32_t)jump_cycles);
|
||||
}
|
||||
|
||||
/*Opcode is likely to cause block to exit, update cycle count*/
|
||||
if (codegen_block_cycles)
|
||||
{
|
||||
@@ -1092,6 +1110,15 @@ generate_call:
|
||||
addlong(codegen_block_ins);
|
||||
codegen_block_ins = 0;
|
||||
}
|
||||
|
||||
if (jump_cycles)
|
||||
{
|
||||
addbyte(0x81); /*SUB $jump_cycles, cyclcs*/
|
||||
addbyte(0x6d);
|
||||
addbyte((uint8_t)cpu_state_offset(_cycles));
|
||||
addlong((uint32_t)jump_cycles);
|
||||
jump_cycles = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if ((op_table == x86_dynarec_opcodes_REPNE || op_table == x86_dynarec_opcodes_REPE) && !op_table[opcode | op_32])
|
||||
|
||||
@@ -977,8 +977,8 @@ static uint32_t gen_MEM_CHECK_WRITE()
|
||||
addbyte(0x6a); /*PUSH 1*/
|
||||
addbyte(1);
|
||||
addbyte(0x57); /*PUSH EDI*/
|
||||
addbyte(0xe8); /*CALL mmutranslatereal*/
|
||||
addlong((uint32_t)mmutranslatereal - (uint32_t)(&codeblock[block_current].data[block_pos + 4]));
|
||||
addbyte(0xe8); /*CALL mmutranslatereal32*/
|
||||
addlong((uint32_t)mmutranslatereal32 - (uint32_t)(&codeblock[block_current].data[block_pos + 4]));
|
||||
addbyte(0x83); /*ADD ESP, 8*/
|
||||
addbyte(0xc4);
|
||||
addbyte(8);
|
||||
@@ -1049,8 +1049,8 @@ static uint32_t gen_MEM_CHECK_WRITE_W()
|
||||
addbyte(0x6a); /*PUSH 1*/
|
||||
addbyte(1);
|
||||
addbyte(0x57); /*PUSH EDI*/
|
||||
addbyte(0xe8); /*CALL mmutranslatereal*/
|
||||
addlong((uint32_t)mmutranslatereal - (uint32_t)(&codeblock[block_current].data[block_pos + 4]));
|
||||
addbyte(0xe8); /*CALL mmutranslatereal32*/
|
||||
addlong((uint32_t)mmutranslatereal32 - (uint32_t)(&codeblock[block_current].data[block_pos + 4]));
|
||||
addbyte(0x5f); /*POP EDI*/
|
||||
addbyte(0x83); /*ADD ESP, 4*/
|
||||
addbyte(0xc4);
|
||||
@@ -1131,8 +1131,8 @@ static uint32_t gen_MEM_CHECK_WRITE_L()
|
||||
addbyte(0x6a); /*PUSH 1*/
|
||||
addbyte(1);
|
||||
addbyte(0x57); /*PUSH EDI*/
|
||||
addbyte(0xe8); /*CALL mmutranslatereal*/
|
||||
addlong((uint32_t)mmutranslatereal - (uint32_t)(&codeblock[block_current].data[block_pos + 4]));
|
||||
addbyte(0xe8); /*CALL mmutranslatereal32*/
|
||||
addlong((uint32_t)mmutranslatereal32 - (uint32_t)(&codeblock[block_current].data[block_pos + 4]));
|
||||
addbyte(0x5f); /*POP EDI*/
|
||||
addbyte(0x83); /*ADD ESP, 4*/
|
||||
addbyte(0xc4);
|
||||
@@ -1874,7 +1874,7 @@ void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t
|
||||
int pc_off = 0;
|
||||
int test_modrm = 1;
|
||||
int c;
|
||||
|
||||
|
||||
op_ea_seg = &cpu_state.seg_ds;
|
||||
op_ssegs = 0;
|
||||
op_old_pc = old_pc;
|
||||
@@ -2031,7 +2031,7 @@ void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t
|
||||
}
|
||||
|
||||
generate_call:
|
||||
codegen_timing_opcode(opcode, fetchdat, op_32);
|
||||
codegen_timing_opcode(opcode, fetchdat, op_32, op_pc);
|
||||
|
||||
if ((op_table == x86_dynarec_opcodes &&
|
||||
((opcode & 0xf0) == 0x70 || (opcode & 0xfc) == 0xe0 || opcode == 0xc2 ||
|
||||
@@ -2039,10 +2039,28 @@ generate_call:
|
||||
(opcode == 0xff && ((fetchdat & 0x38) >= 0x10 && (fetchdat & 0x38) < 0x30)))) ||
|
||||
(op_table == x86_dynarec_opcodes_0f && ((opcode & 0xf0) == 0x80)))
|
||||
{
|
||||
/*On some CPUs (eg K6), a jump/branch instruction may be able to pair with
|
||||
subsequent instructions, so no cycles may have been deducted for it yet.
|
||||
To prevent having zero cycle blocks (eg with a jump instruction pointing
|
||||
to itself), apply the cycles that would be taken if this jump is taken,
|
||||
then reverse it for subsequent instructions if the jump is not taken*/
|
||||
int jump_cycles = 0;
|
||||
|
||||
if (codegen_timing_jump_cycles != NULL)
|
||||
codegen_timing_jump_cycles();
|
||||
|
||||
if (jump_cycles)
|
||||
{
|
||||
addbyte(0x81); /*SUB $jump_cycles, cycles*/
|
||||
addbyte(0x6d);
|
||||
addbyte((uint8_t)cpu_state_offset(_cycles));
|
||||
addlong(jump_cycles);
|
||||
}
|
||||
|
||||
/*Opcode is likely to cause block to exit, update cycle count*/
|
||||
if (codegen_block_cycles)
|
||||
{
|
||||
addbyte(0x81); /*SUB $codegen_block_cycles, cyclcs*/
|
||||
addbyte(0x81); /*SUB $codegen_block_cycles, cycles*/
|
||||
addbyte(0x6d);
|
||||
addbyte((uint8_t)cpu_state_offset(_cycles));
|
||||
addlong(codegen_block_cycles);
|
||||
@@ -2066,6 +2084,15 @@ generate_call:
|
||||
codegen_block_full_ins = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (jump_cycles)
|
||||
{
|
||||
addbyte(0x81); /*SUB $jump_cycles, cycles*/
|
||||
addbyte(0x6d);
|
||||
addbyte((uint8_t)cpu_state_offset(_cycles));
|
||||
addlong(jump_cycles);
|
||||
jump_cycles = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if ((op_table == x86_dynarec_opcodes_REPNE || op_table == x86_dynarec_opcodes_REPE) && !op_table[opcode | op_32])
|
||||
@@ -2099,7 +2126,7 @@ generate_call:
|
||||
addbyte(0xC6); /*MOVB [ssegs],op_ssegs*/
|
||||
addbyte(0x45);
|
||||
addbyte((uint8_t)cpu_state_offset(ssegs));
|
||||
addbyte(op_pc + pc_off);
|
||||
addbyte(op_pc + pc_off);
|
||||
}
|
||||
|
||||
if (!test_modrm ||
|
||||
@@ -2140,7 +2167,7 @@ generate_call:
|
||||
addbyte(0xC7); /*MOVL pc,new_pc*/
|
||||
addbyte(0x45);
|
||||
addbyte((uint8_t)cpu_state_offset(pc));
|
||||
addlong(op_pc + pc_off);
|
||||
addlong(op_pc + pc_off);
|
||||
|
||||
addbyte(0xC7); /*MOVL $old_pc,(oldpc)*/
|
||||
addbyte(0x45);
|
||||
|
||||
161
src/cpu/x86seg.c
161
src/cpu/x86seg.c
@@ -391,12 +391,17 @@ void loadseg(uint16_t seg, x86seg *s)
|
||||
{
|
||||
if (!(seg&~3))
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadseg(): Stack segment is zero",seg&~3);
|
||||
return;
|
||||
}
|
||||
if ((seg&3)!=CPL || dpl!=CPL)
|
||||
if ((seg&3)!=CPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadseg(): Stack segment RPL != CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (dpl!=CPL)
|
||||
{
|
||||
x86gpf("loadseg(): Stack segment DPL != CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
switch ((segdat[2]>>8)&0x1F)
|
||||
@@ -404,7 +409,7 @@ void loadseg(uint16_t seg, x86seg *s)
|
||||
case 0x12: case 0x13: case 0x16: case 0x17: /*r/w*/
|
||||
break;
|
||||
default:
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadseg(): Unknown stack segment type",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (!(segdat[2]&0x8000))
|
||||
@@ -423,16 +428,21 @@ void loadseg(uint16_t seg, x86seg *s)
|
||||
case 0x10: case 0x11: case 0x12: case 0x13: /*Data segments*/
|
||||
case 0x14: case 0x15: case 0x16: case 0x17:
|
||||
case 0x1A: case 0x1B: /*Readable non-conforming code*/
|
||||
if ((seg&3)>dpl || (CPL)>dpl)
|
||||
if ((seg&3)>dpl)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadseg(): Normal segment is zero",seg&~3);
|
||||
return;
|
||||
}
|
||||
if ((CPL)>dpl)
|
||||
{
|
||||
x86gpf("loadseg(): Normal segment DPL < CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 0x1E: case 0x1F: /*Readable conforming code*/
|
||||
break;
|
||||
default:
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadseg(): Unknown normal segment type",seg&~3);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -519,7 +529,7 @@ void loadcs(uint16_t seg)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcs(): Protected mode selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -528,7 +538,7 @@ void loadcs(uint16_t seg)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcs(): Protected mode selector > GDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -544,12 +554,12 @@ void loadcs(uint16_t seg)
|
||||
{
|
||||
if ((seg&3)>CPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcs(): Non-conforming RPL > CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (CPL != DPL)
|
||||
{
|
||||
x86gpf("loadcs(): CPL != DPL",seg&~3);
|
||||
x86gpf("loadcs(): Non-conforming CPL != DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -579,13 +589,13 @@ void loadcs(uint16_t seg)
|
||||
{
|
||||
if (!(segdat[2]&0x8000))
|
||||
{
|
||||
x86np("Load CS system seg not present\n", seg & 0xfffc);
|
||||
x86np("Load CS system seg not present", seg & 0xfffc);
|
||||
return;
|
||||
}
|
||||
switch (segdat[2]&0xF00)
|
||||
{
|
||||
default:
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("Load CS system segment has bits 0-3 of access rights set",seg&~3);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -614,7 +624,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
{
|
||||
if (!(seg&~3))
|
||||
{
|
||||
x86gpf(NULL,0);
|
||||
x86gpf("loadcsjmp(): Selector is zero",0);
|
||||
return;
|
||||
}
|
||||
addr=seg&~7;
|
||||
@@ -622,7 +632,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loacsjmp(): Selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -631,7 +641,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loacsjmp(): Selector > GDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -700,9 +710,14 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
cgate16=!cgate32;
|
||||
oldcs=CS;
|
||||
cpu_state.oldpc = cpu_state.pc;
|
||||
if ((DPL < CPL) || (DPL < (seg&3)))
|
||||
if (DPL < CPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcsjmp(): Call gate DPL < CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (DPL < (seg&3))
|
||||
{
|
||||
x86gpf("loadcsjmp(): Call gate DPL< RPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (DPL < CPL)
|
||||
@@ -732,7 +747,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcsjmp(): Call gate selector > LDT limit",seg2&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -741,7 +756,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcsjmp(): Call gate selector > GDT limit",seg2&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -769,7 +784,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming code*/
|
||||
if (DPL > CPL)
|
||||
{
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcsjmp(): Non-conforming DPL > CPL",seg2&~3);
|
||||
return;
|
||||
}
|
||||
/*FALLTHROUGH*/
|
||||
@@ -789,7 +804,7 @@ void loadcsjmp(uint16_t seg, uint32_t old_pc)
|
||||
break;
|
||||
|
||||
default:
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcsjmp(): Unknown type",seg2&~3);
|
||||
return;
|
||||
}
|
||||
cycles -= timing_jmp_pm_gate;
|
||||
@@ -910,7 +925,7 @@ void loadcscall(uint16_t seg)
|
||||
if (csout) x86seg_log("Protected mode CS load! %04X\n",seg);
|
||||
if (!(seg&~3))
|
||||
{
|
||||
x86gpf(NULL,0);
|
||||
x86gpf("loadcscall(): Protected mode selector is zero",0);
|
||||
return;
|
||||
}
|
||||
addr=seg&~7;
|
||||
@@ -918,7 +933,7 @@ void loadcscall(uint16_t seg)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcscall(): Selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -927,7 +942,7 @@ void loadcscall(uint16_t seg)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcscall(): Selector > GDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -948,18 +963,18 @@ void loadcscall(uint16_t seg)
|
||||
{
|
||||
if ((seg&3)>CPL)
|
||||
{
|
||||
x86gpf("loadcscall(): segment > CPL",seg&~3);
|
||||
x86gpf("loadcscall(): Non-conforming RPL > CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (CPL != DPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcscall(): Non-conforming CPL != DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (CPL < DPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcscall(): CPL < DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (!(segdat[2]&0x8000))
|
||||
@@ -1033,7 +1048,7 @@ void loadcscall(uint16_t seg)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcscall(): ex Selector > LDT limit",seg2&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -1042,7 +1057,7 @@ void loadcscall(uint16_t seg)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcscall(): ex Selector > GDT limit",seg2&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -1241,7 +1256,7 @@ void loadcscall(uint16_t seg)
|
||||
}
|
||||
else if (DPL > CPL)
|
||||
{
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcscall(): Call PM Gate Inner DPL > CPL",seg2&~3);
|
||||
return;
|
||||
}
|
||||
/*FALLTHROUGH*/
|
||||
@@ -1261,7 +1276,7 @@ void loadcscall(uint16_t seg)
|
||||
break;
|
||||
|
||||
default:
|
||||
x86gpf(NULL,seg2&~3);
|
||||
x86gpf("loadcscall(): Unknown subtype",seg2&~3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@@ -1275,7 +1290,7 @@ void loadcscall(uint16_t seg)
|
||||
break;
|
||||
|
||||
default:
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("loadcscall(): Unknown type",seg&~3);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1331,7 +1346,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -1340,7 +1355,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Selector > GDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -1366,7 +1381,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if (CPL != DPL)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Non-conforming CPL != DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@@ -1374,12 +1389,12 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if (CPL < DPL)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Conforming CPL < DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Unknown type",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (!(segdat[2]&0x8000))
|
||||
@@ -1414,7 +1429,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if ((seg&3) != DPL)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Non-conforming RPL != DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
x86seg_log("RETF non-conforming, %i %i\n",seg&3, DPL);
|
||||
@@ -1423,14 +1438,14 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if ((seg&3) < DPL)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Conforming RPL < DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
x86seg_log("RETF conforming, %i %i\n",seg&3, DPL);
|
||||
break;
|
||||
default:
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmoderetf(): Unknown type",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (!(segdat[2]&0x8000))
|
||||
@@ -1455,7 +1470,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if (!(newss&~3))
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmoderetf(): New SS selector is zero",newss&~3);
|
||||
return;
|
||||
}
|
||||
addr=newss&~7;
|
||||
@@ -1464,7 +1479,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmoderetf(): New SS selector > LDT limit",newss&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -1474,7 +1489,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmoderetf(): New SS selector > GDT limit",newss&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -1488,13 +1503,13 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if ((newss & 3) != (seg & 3))
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmoderetf(): New SS RPL > CS RPL",newss&~3);
|
||||
return;
|
||||
}
|
||||
if ((segdat2[2]&0x1A00)!=0x1200)
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmoderetf(): New SS unknown type",newss&~3);
|
||||
return;
|
||||
}
|
||||
if (!(segdat2[2]&0x8000))
|
||||
@@ -1506,7 +1521,7 @@ void pmoderetf(int is32, uint16_t off)
|
||||
if (DPL2 != (seg & 3))
|
||||
{
|
||||
ESP=oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmoderetf(): New SS DPL != CS RPL",newss&~3);
|
||||
return;
|
||||
}
|
||||
SS=newss;
|
||||
@@ -1564,7 +1579,7 @@ void pmodeint(int num, int soft)
|
||||
if (cpu_state.eflags&VM_FLAG && IOPL!=3 && soft)
|
||||
{
|
||||
x86seg_log("V86 banned int\n");
|
||||
x86gpf(NULL,0);
|
||||
x86gpf("pmodeint(): V86 banned int",0);
|
||||
return;
|
||||
}
|
||||
addr=(num<<3);
|
||||
@@ -1582,9 +1597,9 @@ void pmodeint(int num, int soft)
|
||||
}
|
||||
else
|
||||
{
|
||||
x86gpf(NULL,(num*8)+2+((soft)?0:1));
|
||||
x86gpf("pmodeint(): Vector > IDT limit",(num*8)+2+((soft)?0:1));
|
||||
}
|
||||
x86seg_log("addr >= IDT.limit\n");
|
||||
x86seg_log("addr >= IDT.limit\n");
|
||||
return;
|
||||
}
|
||||
addr+=idt.base;
|
||||
@@ -1598,12 +1613,12 @@ void pmodeint(int num, int soft)
|
||||
x86seg_log("Addr %08X seg %04X %04X %04X %04X\n",addr,segdat[0],segdat[1],segdat[2],segdat[3]);
|
||||
if (!(segdat[2]&0x1F00))
|
||||
{
|
||||
x86gpf(NULL,(num*8)+2);
|
||||
x86gpf("pmodeint(): Vector descriptor with bad type",(num*8)+2);
|
||||
return;
|
||||
}
|
||||
if (DPL<CPL && soft)
|
||||
{
|
||||
x86gpf(NULL,(num*8)+2);
|
||||
x86gpf("pmodeint(): Vector DPL < CPL",(num*8)+2);
|
||||
return;
|
||||
}
|
||||
type=segdat[2]&0x1F00;
|
||||
@@ -1624,7 +1639,7 @@ void pmodeint(int num, int soft)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): Interrupt or trap gate selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -1633,7 +1648,7 @@ void pmodeint(int num, int soft)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): Interrupt or trap gate selector > GDT limit", seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -1647,7 +1662,7 @@ void pmodeint(int num, int soft)
|
||||
|
||||
if (DPL2 > CPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): Interrupt or trap gate DPL > CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
switch (segdat2[2]&0x1F00)
|
||||
@@ -1662,7 +1677,7 @@ void pmodeint(int num, int soft)
|
||||
}
|
||||
if ((cpu_state.eflags&VM_FLAG) && DPL2)
|
||||
{
|
||||
x86gpf(NULL,segdat[1]&0xFFFC);
|
||||
x86gpf("pmodeint(): Interrupt or trap gate non-zero DPL in V86 mode",segdat[1]&0xFFFC);
|
||||
return;
|
||||
}
|
||||
/*Load new stack*/
|
||||
@@ -1774,7 +1789,7 @@ void pmodeint(int num, int soft)
|
||||
}
|
||||
else if (DPL2!=CPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): DPL != CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
/*FALLTHROUGH*/
|
||||
@@ -1786,7 +1801,7 @@ void pmodeint(int num, int soft)
|
||||
}
|
||||
if ((cpu_state.eflags & VM_FLAG) && DPL2<CPL)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): DPL < CPL in V86 mode",seg&~3);
|
||||
return;
|
||||
}
|
||||
if (type>0x800)
|
||||
@@ -1804,7 +1819,7 @@ void pmodeint(int num, int soft)
|
||||
new_cpl = CS & 3;
|
||||
break;
|
||||
default:
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): Unknown type",seg&~3);
|
||||
return;
|
||||
}
|
||||
do_seg_load(&cpu_state.seg_cs, segdat2);
|
||||
@@ -1838,7 +1853,7 @@ void pmodeint(int num, int soft)
|
||||
{
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): Task gate selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -1847,7 +1862,7 @@ void pmodeint(int num, int soft)
|
||||
{
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeint(): Task gate selector > GDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -1926,7 +1941,7 @@ void pmodeiret(int is32)
|
||||
if (seg&4)
|
||||
{
|
||||
x86seg_log("TS LDT %04X %04X IRET\n",seg,gdt.limit);
|
||||
x86ts(NULL,seg&~3);
|
||||
x86ts("pmodeiret(): Selector points to LDT",seg&~3);
|
||||
return;
|
||||
}
|
||||
else
|
||||
@@ -2016,7 +2031,7 @@ void pmodeiret(int is32)
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeiret(): Selector > LDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -2026,7 +2041,7 @@ void pmodeiret(int is32)
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeiret(): Selector > GDT limit",seg&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -2034,7 +2049,7 @@ void pmodeiret(int is32)
|
||||
if ((seg&3) < CPL)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeiret(): RPL < CPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
cpl_override=1;
|
||||
@@ -2049,7 +2064,7 @@ void pmodeiret(int is32)
|
||||
if ((seg&3) != DPL)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeiret(): Non-conforming RPL != DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@@ -2057,7 +2072,7 @@ void pmodeiret(int is32)
|
||||
if ((seg&3) < DPL)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,seg&~3);
|
||||
x86gpf("pmodeiret(): Conforming RPL < DPL",seg&~3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@@ -2107,7 +2122,7 @@ void pmodeiret(int is32)
|
||||
if (!(newss&~3))
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmodeiret(): New SS selector is zero",newss&~3);
|
||||
return;
|
||||
}
|
||||
addr=newss&~7;
|
||||
@@ -2116,7 +2131,7 @@ void pmodeiret(int is32)
|
||||
if (addr>=ldt.limit)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmodeiret(): New SS selector > LDT limit",newss&~3);
|
||||
return;
|
||||
}
|
||||
addr+=ldt.base;
|
||||
@@ -2126,7 +2141,7 @@ void pmodeiret(int is32)
|
||||
if (addr>=gdt.limit)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmodeiret(): New SS selector > GDT limit",newss&~3);
|
||||
return;
|
||||
}
|
||||
addr+=gdt.base;
|
||||
@@ -2139,19 +2154,19 @@ void pmodeiret(int is32)
|
||||
if ((newss & 3) != (seg & 3))
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmodeiret(): New SS RPL > CS RPL",newss&~3);
|
||||
return;
|
||||
}
|
||||
if ((segdat2[2]&0x1A00)!=0x1200)
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmodeiret(): New SS bad type",newss&~3);
|
||||
return;
|
||||
}
|
||||
if (DPL2 != (seg & 3))
|
||||
{
|
||||
ESP = oldsp;
|
||||
x86gpf(NULL,newss&~3);
|
||||
x86gpf("pmodeiret(): New SS DPL != CS RPL",newss&~3);
|
||||
return;
|
||||
}
|
||||
if (!(segdat2[2]&0x8000))
|
||||
|
||||
Reference in New Issue
Block a user