Parcourir la source

aco/gfx10: Fix mitigation of VMEMtoScalarWriteHazard.

This commit refines the VMEMtoScalarWriteHazard mitigation, based
upon a closer look at what LLVM does. Also changes the code to
match the structure of the other hazard mitigations.

* The hazard is not only triggered by VMEM, FLAT and GLOBAL
  but also SCRATCH and DS instructions.
* The SMEM/SALU instructions only cause a hazard when they
  write a register that the VMEM/etc. are reading.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
tags/19.3-branchpoint
Timur Kristóf il y a 6 ans
Parent
révision
b01847bd94
1 fichiers modifiés avec 20 ajouts et 10 suppressions
  1. 20
    10
      src/amd/compiler/aco_insert_NOPs.cpp

+ 20
- 10
src/amd/compiler/aco_insert_NOPs.cpp Voir le fichier

@@ -40,7 +40,6 @@ struct NOP_ctx {
int VALU_wrsgpr = -10;

/* GFX10 */
int last_VMEM_since_scalar_write = -1;
bool has_VOPC = false;
bool has_nonVALU_exec_read = false;
bool has_VMEM = false;
@@ -48,6 +47,7 @@ struct NOP_ctx {
bool has_DS = false;
bool has_branch_after_DS = false;
std::bitset<128> sgprs_read_by_SMEM;
std::bitset<128> sgprs_read_by_VMEM;

NOP_ctx(Program* program) : chip_class(program->chip_class) {
vcc_physical = program->config->num_sgprs - 2;
@@ -342,21 +342,31 @@ std::pair<int, int> handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr<Instruction>&
if (instr->format == Format::SMEM)
sNOPs = std::max(sNOPs, handle_SMEM_clause(instr, new_idx, new_instructions));

/* handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between */
if (instr->isSALU() || instr->format == Format::SMEM) {
if (!instr->definitions.empty() && ctx.last_VMEM_since_scalar_write != -1) {
ctx.last_VMEM_since_scalar_write = -1;
vNOPs = 1;
/* VMEMtoScalarWriteHazard
* Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between.
*/
if (instr->isVMEM() || instr->format == Format::FLAT || instr->format == Format::GLOBAL ||
instr->format == Format::SCRATCH || instr->format == Format::DS) {
/* Remember all SGPRs that are read by the VMEM instruction */
mark_read_regs(instr, ctx.sgprs_read_by_VMEM);
} else if (instr->isSALU() || instr->format == Format::SMEM) {
/* Check if SALU writes an SGPR that was previously read by the VALU */
if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) {
ctx.sgprs_read_by_VMEM.reset();

/* Insert v_nop to mitigate the problem */
aco_ptr<VOP1_instruction> nop{create_instruction<VOP1_instruction>(aco_opcode::v_nop, Format::VOP1, 0, 0)};
new_instructions.emplace_back(std::move(nop));
}
} else if (instr->isVMEM() || instr->isFlatOrGlobal()) {
ctx.last_VMEM_since_scalar_write = new_idx;
} else if (instr->opcode == aco_opcode::s_waitcnt) {
/* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
uint16_t imm = static_cast<SOPP_instruction*>(instr.get())->imm;
unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
if (vmcnt == 0)
ctx.last_VMEM_since_scalar_write = -1;
ctx.sgprs_read_by_VMEM.reset();
} else if (instr->isVALU()) {
ctx.last_VMEM_since_scalar_write = -1;
/* Hazard is mitigated by any VALU instruction */
ctx.sgprs_read_by_VMEM.reset();
}

/* VcmpxPermlaneHazard

Chargement…
Annuler
Enregistrer