|
|
@@ -267,6 +267,97 @@ vec4_visitor::reg_allocate() |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* When we decide to spill a register, instead of blindly spilling every use, |
|
|
|
* save unspills when the spill register is used (read) in consecutive |
|
|
|
* instructions. This can potentially save a bunch of unspills that would |
|
|
|
* have very little impact in register allocation anyway. |
|
|
|
* |
|
|
|
* Notice that we need to account for this behavior when spilling a register |
|
|
|
* and when evaluating spilling costs. This function is designed so it can |
|
|
|
* be called from both places and avoid repeating the logic. |
|
|
|
* |
|
|
|
* - When we call this function from spill_reg(), we pass in scratch_reg the |
|
|
|
* actual unspill/spill register that we want to reuse in the current |
|
|
|
* instruction. |
|
|
|
* |
|
|
|
* - When we call this from evaluate_spill_costs(), we pass the register for |
|
|
|
* which we are evaluating spilling costs. |
|
|
|
* |
|
|
|
* In either case, we check if the previous instructions read scratch_reg until |
|
|
|
* we find one that writes to it with a compatible mask or does not read/write |
|
|
|
* scratch_reg at all. |
|
|
|
*/ |
|
|
|
static bool |
|
|
|
can_use_scratch_for_source(const vec4_instruction *inst, unsigned i, |
|
|
|
unsigned scratch_reg) |
|
|
|
{ |
|
|
|
assert(inst->src[i].file == GRF); |
|
|
|
bool prev_inst_read_scratch_reg = false; |
|
|
|
|
|
|
|
/* See if any previous source in the same instructions reads scratch_reg */ |
|
|
|
for (unsigned n = 0; n < i; n++) { |
|
|
|
if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg) |
|
|
|
prev_inst_read_scratch_reg = true; |
|
|
|
} |
|
|
|
|
|
|
|
/* Now check if previous instructions read/write scratch_reg */ |
|
|
|
for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev; |
|
|
|
!prev_inst->is_head_sentinel(); |
|
|
|
prev_inst = (vec4_instruction *) prev_inst->prev) { |
|
|
|
|
|
|
|
/* If the previous instruction writes to scratch_reg then we can reuse |
|
|
|
* it if the write is not conditional and the channels we write are |
|
|
|
* compatible with our read mask |
|
|
|
*/ |
|
|
|
if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) { |
|
|
|
return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) && |
|
|
|
(brw_mask_for_swizzle(inst->src[i].swizzle) & |
|
|
|
~prev_inst->dst.writemask) == 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Skip scratch read/writes so that instructions generated by spilling |
|
|
|
* other registers (that won't read/write scratch_reg) do not stop us from |
|
|
|
* reusing scratch_reg for this instruction. |
|
|
|
*/ |
|
|
|
if (prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE || |
|
|
|
prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ) |
|
|
|
continue; |
|
|
|
|
|
|
|
/* If the previous instruction does not write to scratch_reg, then check |
|
|
|
* if it reads it |
|
|
|
*/ |
|
|
|
int n; |
|
|
|
for (n = 0; n < 3; n++) { |
|
|
|
if (prev_inst->src[n].file == GRF && |
|
|
|
prev_inst->src[n].reg == scratch_reg) { |
|
|
|
prev_inst_read_scratch_reg = true; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
if (n == 3) { |
|
|
|
/* The previous instruction does not read scratch_reg. At this point, |
|
|
|
* if no previous instruction has read scratch_reg it means that we |
|
|
|
* will need to unspill it here and we can't reuse it (so we return |
|
|
|
* false). Otherwise, if we found at least one consecutive instruction |
|
|
|
* that read scratch_reg, then we know that we got here from |
|
|
|
* evaluate_spill_costs (since for the spill_reg path any block of |
|
|
|
* consecutive instructions using scratch_reg must start with a write |
|
|
|
* to that register, so we would've exited the loop in the check for |
|
|
|
* the write that we have at the start of this loop), and in that case |
|
|
|
* it means that we found the point at which the scratch_reg would be |
|
|
|
* unspilled. Since we always unspill a full vec4, it means that we |
|
|
|
* have all the channels available and we can just return true to |
|
|
|
* signal that we can reuse the register in the current instruction |
|
|
|
* too. |
|
|
|
*/ |
|
|
|
return prev_inst_read_scratch_reg; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return prev_inst_read_scratch_reg; |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) |
|
|
|
{ |
|
|
@@ -284,9 +375,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) |
|
|
|
foreach_block_and_inst(block, vec4_instruction, inst, cfg) { |
|
|
|
for (unsigned int i = 0; i < 3; i++) { |
|
|
|
if (inst->src[i].file == GRF) { |
|
|
|
spill_costs[inst->src[i].reg] += loop_scale; |
|
|
|
if (inst->src[i].reladdr) |
|
|
|
no_spill[inst->src[i].reg] = true; |
|
|
|
/* We will only unspill src[i] it it wasn't unspilled for the |
|
|
|
* previous instruction, in which case we'll just reuse the scratch |
|
|
|
* reg for this instruction. |
|
|
|
*/ |
|
|
|
if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) { |
|
|
|
spill_costs[inst->src[i].reg] += loop_scale; |
|
|
|
if (inst->src[i].reladdr) |
|
|
|
no_spill[inst->src[i].reg] = true; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -345,19 +442,32 @@ vec4_visitor::spill_reg(int spill_reg_nr) |
|
|
|
unsigned int spill_offset = last_scratch++; |
|
|
|
|
|
|
|
/* Generate spill/unspill instructions for the objects being spilled. */ |
|
|
|
int scratch_reg = -1; |
|
|
|
foreach_block_and_inst(block, vec4_instruction, inst, cfg) { |
|
|
|
for (unsigned int i = 0; i < 3; i++) { |
|
|
|
if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { |
|
|
|
src_reg spill_reg = inst->src[i]; |
|
|
|
inst->src[i].reg = alloc.allocate(1); |
|
|
|
dst_reg temp = dst_reg(inst->src[i]); |
|
|
|
|
|
|
|
emit_scratch_read(block, inst, temp, spill_reg, spill_offset); |
|
|
|
if (scratch_reg == -1 || |
|
|
|
!can_use_scratch_for_source(inst, i, scratch_reg)) { |
|
|
|
/* We need to unspill anyway so make sure we read the full vec4 |
|
|
|
* in any case. This way, the cached register can be reused |
|
|
|
* for consecutive instructions that read different channels of |
|
|
|
* the same vec4. |
|
|
|
*/ |
|
|
|
scratch_reg = alloc.allocate(1); |
|
|
|
src_reg temp = inst->src[i]; |
|
|
|
temp.reg = scratch_reg; |
|
|
|
temp.swizzle = BRW_SWIZZLE_XYZW; |
|
|
|
emit_scratch_read(block, inst, |
|
|
|
dst_reg(temp), inst->src[i], spill_offset); |
|
|
|
} |
|
|
|
assert(scratch_reg != -1); |
|
|
|
inst->src[i].reg = scratch_reg; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) { |
|
|
|
emit_scratch_write(block, inst, spill_offset); |
|
|
|
scratch_reg = inst->dst.reg; |
|
|
|
} |
|
|
|
} |
|
|
|
|