Quellcode durchsuchen

aco: handle loop exit and IF merge phis with break/discard

ACO considers discards jumps and creates edges in the CFG for them but NIR
does neither of these.

This can be fixed instead by keeping track of whether a side of an IF had
a break/discard, but this doesn't solve the issue with discards affecting
loop exit phis. So this reworks phi handling a bit.

Fixes these tests:
dEQP-VK.graphicsfuzz.disc-and-add-in-func-in-loop
dEQP-VK.graphicsfuzz.loop-call-discard
dEQP-VK.graphicsfuzz.complex-nested-loops-and-call

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
master
Rhys Perry vor 6 Jahren
Ursprung
Commit
0e8da9f607

+ 75
- 54
src/amd/compiler/aco_instruction_selection.cpp Datei anzeigen

@@ -6912,20 +6912,48 @@ Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa)
void visit_phi(isel_context *ctx, nir_phi_instr *instr)
{
aco_ptr<Pseudo_instruction> phi;
unsigned num_src = exec_list_length(&instr->srcs);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == s2);

aco_opcode opcode = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index] ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
bool logical = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index];
logical |= ctx->block->kind & block_kind_merge;
aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi;

/* we want a sorted list of sources, since the predecessor list is also sorted */
std::map<unsigned, nir_ssa_def*> phi_src;
bool all_undef = true;
nir_foreach_phi_src(src, instr) {
nir_foreach_phi_src(src, instr)
phi_src[src->pred->index] = src->src.ssa;
if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
all_undef = false;

std::vector<unsigned>& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds;
unsigned num_operands = 0;
Operand operands[std::max(exec_list_length(&instr->srcs), (unsigned)preds.size())];
unsigned num_defined = 0;
unsigned cur_pred_idx = 0;
for (std::pair<unsigned, nir_ssa_def *> src : phi_src) {
if (cur_pred_idx < preds.size()) {
/* handle missing preds (IF merges with discard/break) and extra preds (loop exit with discard) */
unsigned block = ctx->cf_info.nir_to_aco[src.first];
unsigned skipped = 0;
while (cur_pred_idx + skipped < preds.size() && preds[cur_pred_idx + skipped] != block)
skipped++;
if (cur_pred_idx + skipped < preds.size()) {
for (unsigned i = 0; i < skipped; i++)
operands[num_operands++] = Operand(dst.regClass());
cur_pred_idx += skipped;
} else {
continue;
}
}
cur_pred_idx++;
Operand op = get_phi_operand(ctx, src.second);
operands[num_operands++] = op;
num_defined += !op.isUndefined();
}
if (all_undef) {
/* handle block_kind_continue_or_break at loop exit blocks */
while (cur_pred_idx++ < preds.size())
operands[num_operands++] = Operand(dst.regClass());

if (num_defined == 0) {
Builder bld(ctx->program, ctx->block);
if (dst.regClass() == s1) {
bld.sop1(aco_opcode::s_mov_b32, Definition(dst), Operand(0u));
@@ -6941,17 +6969,41 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
return;
}

/* we can use a linear phi in some cases if one src is undef */
if (dst.is_linear() && ctx->block->kind & block_kind_merge && num_defined == 1) {
phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, num_operands, 1));

Block *linear_else = &ctx->program->blocks[ctx->block->linear_preds[1]];
Block *invert = &ctx->program->blocks[linear_else->linear_preds[0]];
assert(invert->kind & block_kind_invert);

unsigned then_block = invert->linear_preds[0];

Block* insert_block = NULL;
for (unsigned i = 0; i < num_operands; i++) {
Operand op = operands[i];
if (op.isUndefined())
continue;
insert_block = ctx->block->logical_preds[i] == then_block ? invert : ctx->block;
phi->operands[0] = op;
break;
}
assert(insert_block); /* should be handled by the "num_defined == 0" case above */
phi->operands[1] = Operand(dst.regClass());
phi->definitions[0] = Definition(dst);
insert_block->instructions.emplace(insert_block->instructions.begin(), std::move(phi));
return;
}

/* try to scalarize vector phis */
if (instr->dest.ssa.bit_size != 1 && dst.size() > 1) {
// TODO: scalarize linear phis on divergent ifs
bool can_scalarize = (opcode == aco_opcode::p_phi || !(ctx->block->kind & block_kind_merge));
std::array<Temp, 4> new_vec;
for (std::pair<const unsigned, nir_ssa_def*>& pair : phi_src) {
Operand src = get_phi_operand(ctx, pair.second);
if (src.isTemp() && ctx->allocated_vec.find(src.tempId()) == ctx->allocated_vec.end()) {
for (unsigned i = 0; can_scalarize && (i < num_operands); i++) {
Operand src = operands[i];
if (src.isTemp() && ctx->allocated_vec.find(src.tempId()) == ctx->allocated_vec.end())
can_scalarize = false;
break;
}
}
if (can_scalarize) {
unsigned num_components = instr->dest.ssa.num_components;
@@ -6960,12 +7012,10 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)

aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
for (unsigned k = 0; k < num_components; k++) {
phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_src, 1));
std::map<unsigned, nir_ssa_def*>::iterator it = phi_src.begin();
for (unsigned i = 0; i < num_src; i++) {
Operand src = get_phi_operand(ctx, it->second);
phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_operands, 1));
for (unsigned i = 0; i < num_operands; i++) {
Operand src = operands[i];
phi->operands[i] = src.isTemp() ? Operand(ctx->allocated_vec[src.tempId()][k]) : Operand(rc);
++it;
}
Temp phi_dst = {ctx->program->allocateId(), rc};
phi->definitions[0] = Definition(phi_dst);
@@ -6980,43 +7030,9 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
}
}

unsigned extra_src = 0;
if (opcode == aco_opcode::p_linear_phi && (ctx->block->kind & block_kind_loop_exit) &&
ctx->program->blocks[ctx->block->index-2].kind & block_kind_continue_or_break) {
extra_src++;
}

phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_src + extra_src, 1));

/* if we have a linear phi on a divergent if, we know that one src is undef */
if (opcode == aco_opcode::p_linear_phi && ctx->block->kind & block_kind_merge) {
assert(extra_src == 0);
Block* block;
/* we place the phi either in the invert-block or in the current block */
if (phi_src.begin()->second->parent_instr->type != nir_instr_type_ssa_undef) {
assert((++phi_src.begin())->second->parent_instr->type == nir_instr_type_ssa_undef);
Block& linear_else = ctx->program->blocks[ctx->block->linear_preds[1]];
block = &ctx->program->blocks[linear_else.linear_preds[0]];
assert(block->kind & block_kind_invert);
phi->operands[0] = get_phi_operand(ctx, phi_src.begin()->second);
} else {
assert((++phi_src.begin())->second->parent_instr->type != nir_instr_type_ssa_undef);
block = ctx->block;
phi->operands[0] = get_phi_operand(ctx, (++phi_src.begin())->second);
}
phi->operands[1] = Operand(dst.regClass());
phi->definitions[0] = Definition(dst);
block->instructions.emplace(block->instructions.begin(), std::move(phi));
return;
}

std::map<unsigned, nir_ssa_def*>::iterator it = phi_src.begin();
for (unsigned i = 0; i < num_src; i++) {
phi->operands[i] = get_phi_operand(ctx, it->second);
++it;
}
for (unsigned i = 0; i < extra_src; i++)
phi->operands[num_src + i] = Operand(dst.regClass());
phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_operands, 1));
for (unsigned i = 0; i < num_operands; i++)
phi->operands[i] = operands[i];
phi->definitions[0] = Definition(dst);
ctx->block->instructions.emplace(ctx->block->instructions.begin(), std::move(phi));
}
@@ -7062,6 +7078,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
return;
}
ctx->cf_info.parent_loop.has_divergent_branch = true;
ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index;
break;
case nir_jump_continue:
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
@@ -7073,6 +7090,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
we must ensure that they are handled correctly */
ctx->cf_info.parent_loop.has_divergent_continue = true;
ctx->cf_info.parent_loop.has_divergent_branch = true;
ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index;
} else {
/* uniform continue - directly jump to the loop header */
ctx->block->kind |= block_kind_uniform;
@@ -7144,6 +7162,9 @@ void visit_block(isel_context *ctx, nir_block *block)
//abort();
}
}

if (!ctx->cf_info.parent_loop.has_divergent_branch)
ctx->cf_info.nir_to_aco[block->index] = ctx->block->index;
}



+ 5
- 0
src/amd/compiler/aco_instruction_selection_setup.cpp Datei anzeigen

@@ -69,6 +69,7 @@ struct isel_context {
bool is_divergent = false;
} parent_if;
bool exec_potentially_empty = false;
std::unique_ptr<unsigned[]> nir_to_aco; /* NIR block index to ACO block index */
} cf_info;

Temp arg_temps[AC_MAX_ARGS];
@@ -133,6 +134,8 @@ void init_context(isel_context *ctx, nir_shader *shader)

unsigned spi_ps_inputs = 0;

std::unique_ptr<unsigned[]> nir_to_aco{new unsigned[impl->num_blocks]()};

bool done = false;
while (!done) {
done = true;
@@ -538,6 +541,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
allocated[i] = Temp(ctx->program->allocateId(), allocated[i].regClass());

ctx->allocated.reset(allocated.release());
ctx->cf_info.nir_to_aco.reset(nir_to_aco.release());
}

Pseudo_instruction *add_startpgm(struct isel_context *ctx)
@@ -899,6 +903,7 @@ setup_isel_context(Program* program,

nir_function_impl *func = nir_shader_get_entrypoint(nir);
nir_index_ssa_defs(func);
nir_metadata_require(func, nir_metadata_block_index);

if (args->options->dump_preoptir) {
fprintf(stderr, "NIR shader before instruction selection:\n");

Laden…
Abbrechen
Speichern