NDR: For barrier micro-ops, lazily mark written registers instead of iterating

Also remove some dead code in there.

4-5% more improvement observed.
This commit is contained in:
Cacodemon345
2025-05-06 16:25:09 +06:00
parent 864e01b0e2
commit aafd2f22f5
4 changed files with 62 additions and 34 deletions

View File

@@ -377,6 +377,34 @@ uop_alloc(ir_data_t *ir, uint32_t uop_type)
uop->jump_dest_uop = -1;
uop->jump_list_next = -1;
if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER))
dirty_ir_regs[0] = dirty_ir_regs[1] = ~0ULL;
return uop;
}
static inline uop_t *
uop_alloc_unroll(ir_data_t *ir, uint32_t uop_type)
{
uop_t *uop;
if (ir->wr_pos >= UOP_NR_MAX)
fatal("Exceeded uOP max\n");
uop = &ir->uops[ir->wr_pos++];
uop->is_a16 = 0;
uop->dest_reg_a = invalid_ir_reg;
uop->src_reg_a = invalid_ir_reg;
uop->src_reg_b = invalid_ir_reg;
uop->src_reg_c = invalid_ir_reg;
uop->pc = cpu_state.oldpc;
uop->jump_dest_uop = -1;
uop->jump_list_next = -1;
if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER))
codegen_reg_mark_as_required();