Просмотр исходного кода

v3d: handle writes to gl_Layer from geometry shaders

When geometry shaders write a value to gl_Layer that doesn't correspond to
an existing layer in the target framebuffer the rendering behavior is
undefined according to the spec, however, there are CTS tests that trigger
this scenario on purpose, probably to ensure that nothing terrible happens.

For V3D, this situation is problematic because the binner uses the layer
index to select the offset to write into the tile state data, and we only
allocate tile state for MAX2(num_layers, 1), so we want to make sure we
don't produce values that would lead to out of bounds writes. The simulator
has an assert to catch this, although we haven't observed issues in actual
hardware it is probably best to play safe.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
master
Iago Toral Quiroga 5 лет назад
Родитель
Сommit
6c7a2b69f8

+ 5
- 0
src/broadcom/compiler/nir_to_vir.c Просмотреть файл

@@ -2346,6 +2346,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
ntq_store_dest(c, &instr->dest, 0, vir_IID(c));
break;

case nir_intrinsic_load_fb_layers_v3d:
ntq_store_dest(c, &instr->dest, 0,
vir_uniform(c, QUNIFORM_FB_LAYERS, 0));
break;

default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);

+ 8
- 0
src/broadcom/compiler/v3d_compiler.h Просмотреть файл

@@ -279,6 +279,14 @@ enum quniform_contents {
* L2T cache will effectively be the shared memory area.
*/
QUNIFORM_SHARED_OFFSET,

/**
* Returns the number of layers in the framebuffer.
*
* This is used to cap gl_Layer in geometry shaders to avoid
* out-of-bounds accesses into the tile state during binning.
*/
QUNIFORM_FB_LAYERS,
};

static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)

+ 40
- 0
src/broadcom/compiler/v3d_nir_lower_io.c Просмотреть файл

@@ -193,6 +193,46 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
}

if (var->data.location == VARYING_SLOT_LAYER) {
assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));

/* From the GLES 3.2 spec:
*
* "When fragments are written to a layered framebuffer, the
* fragment’s layer number selects an image from the array
* of images at each attachment (...). If the fragment’s
* layer number is negative, or greater than or equal to
* the minimum number of layers of any attachment, the
* effects of the fragment on the framebuffer contents are
* undefined."
*
* This suggests we can just ignore that situation, however,
* for V3D an out-of-bounds layer index means that the binner
* might do out-of-bounds writes access to the tile state. The
* simulator has an assert to catch this, so we play safe here
* and we make sure that doesn't happen by setting gl_Layer
* to 0 in that case (we always allocate tile state for at
* least one layer).
*/
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_fb_layers_v3d);
load->num_components = 1;
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def *fb_layers = &load->dest.ssa;

nir_ssa_def *cond = nir_ige(b, src, fb_layers);
nir_ssa_def *layer_id =
nir_bcsel(b, cond,
nir_imm_int(b, 0),
nir_ishl(b, src, nir_imm_int(b, 16)));
header = nir_ior(b, header, layer_id);
nir_store_var(b, state->gs.header_var, header, 0x1);
}

/* Scalarize outputs if it hasn't happened already, since we want to
* schedule each VPM write individually. We can skip any outut
* components not read by the FS.

+ 4
- 0
src/compiler/nir/nir_intrinsics.py Просмотреть файл

@@ -868,3 +868,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], [])
# src[] = { value, render_target }
# BASE = sample index
store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], [])

# V3D-specific intrinsic to load the number of layers attached to
# the target framebuffer
intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])

+ 8
- 0
src/gallium/drivers/v3d/v3d_uniforms.c Просмотреть файл

@@ -373,6 +373,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
v3d->compute_shared_memory, 0);
break;

case QUNIFORM_FB_LAYERS:
cl_aligned_u32(&uniforms, job->num_layers);
break;

default:
assert(quniform_contents_is_texture_p0(uinfo->contents[i]));

@@ -465,6 +469,10 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
/* Compute always recalculates uniforms. */
break;

case QUNIFORM_FB_LAYERS:
dirty |= VC5_DIRTY_FRAMEBUFFER;
break;

default:
assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |

Загрузка…
Отмена
Сохранить