NDR: For barrier micro-ops, lazily mark written registers instead of iterating

Also remove some dead code in there.

4-5% more improvement observed.
This commit is contained in:
Cacodemon345
2025-05-06 16:25:09 +06:00
parent 864e01b0e2
commit aafd2f22f5
4 changed files with 62 additions and 34 deletions

View File

@@ -34,6 +34,8 @@ typedef struct host_reg_set_t {
static host_reg_set_t host_reg_set;
static host_reg_set_t host_fp_reg_set;
uint64_t dirty_ir_regs[2] = { 0, 0 };
enum {
REG_BYTE,
REG_WORD,
@@ -184,6 +186,24 @@ struct
[IREG_temp1d] = { REG_DOUBLE, (void *) 48, REG_FP, REG_VOLATILE },
};
static const uint8_t native_requested_sizes[9][8] =
{
[REG_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1,
[REG_FPU_ST_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1,
[REG_WORD][IREG_SIZE_W >> IREG_SIZE_SHIFT] = 1,
[REG_DWORD][IREG_SIZE_L >> IREG_SIZE_SHIFT] = 1,
[REG_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
[REG_FPU_ST_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
[REG_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
[REG_FPU_ST_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
[REG_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
[REG_FPU_ST_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
[REG_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
[REG_FPU_ST_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
[REG_POINTER][(sizeof(void *) == 4) ? (IREG_SIZE_L >> IREG_SIZE_SHIFT) : (IREG_SIZE_Q >> IREG_SIZE_SHIFT)] = 1
};
void
codegen_reg_mark_as_required(void)
{
@@ -195,6 +215,7 @@ codegen_reg_mark_as_required(void)
if (last_version > 0)
reg_version[reg][last_version].flags |= REG_FLAGS_REQUIRED;
}
dirty_ir_regs[0] = dirty_ir_regs[1] = 0;
}
int
@@ -203,29 +224,7 @@ reg_is_native_size(ir_reg_t ir_reg)
int native_size = ireg_data[IREG_GET_REG(ir_reg.reg)].native_size;
int requested_size = IREG_GET_SIZE(ir_reg.reg);
switch (native_size) {
case REG_BYTE:
case REG_FPU_ST_BYTE:
return (requested_size == IREG_SIZE_B);
case REG_WORD:
return (requested_size == IREG_SIZE_W);
case REG_DWORD:
return (requested_size == IREG_SIZE_L);
case REG_QWORD:
case REG_FPU_ST_QWORD:
case REG_DOUBLE:
case REG_FPU_ST_DOUBLE:
return ((requested_size == IREG_SIZE_D) || (requested_size == IREG_SIZE_Q));
case REG_POINTER:
if (sizeof(void *) == 4)
return (requested_size == IREG_SIZE_L);
return (requested_size == IREG_SIZE_Q);
default:
fatal("get_reg_is_native_size: unknown native size %i\n", native_size);
}
return 0;
return native_requested_sizes[native_size][requested_size >> IREG_SIZE_SHIFT];
}
void
@@ -258,6 +257,8 @@ codegen_reg_reset(void)
host_fp_reg_set.locked = 0;
host_fp_reg_set.nr_regs = CODEGEN_HOST_FP_REGS;
dirty_ir_regs[0] = dirty_ir_regs[1] = 0;
for (c = 0; c < IREG_COUNT; c++) {
reg_last_version[c] = 0;
reg_version[c][0].refcount = 0;