This is more optimal as it means we no longer have to upload the same set of ABO surfaces to all stages in the program. This also fixes a bug where since commit c0cd5b var->data.binding was being used as a replacement for atomic buffer index, but they don't have to be the same value they just happened to end up the same when binding is 0. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Cc: Ilia Mirkin <imirkin@alum.mit.edu> Cc: Alejandro Piñeiro <apinheiro@igalia.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175tags/11.1-branchpoint
@@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, | |||
struct gl_shader_program *prog) | |||
{ | |||
unsigned num_buffers; | |||
unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; | |||
active_atomic_buffer *abs = | |||
find_active_atomic_counters(ctx, prog, &num_buffers); | |||
@@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, | |||
} | |||
/* Assign stage-specific fields. */ | |||
for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) | |||
mab.StageReferences[j] = | |||
(ab.stage_references[j] ? GL_TRUE : GL_FALSE); | |||
for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { | |||
if (ab.stage_references[j]) { | |||
mab.StageReferences[j] = GL_TRUE; | |||
num_atomic_buffers[j]++; | |||
} else { | |||
mab.StageReferences[j] = GL_FALSE; | |||
} | |||
} | |||
i++; | |||
} | |||
/* Store a list pointers to atomic buffers per stage and store the index | |||
* to the intra-stage buffer list in uniform storage. | |||
*/ | |||
for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { | |||
if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { | |||
prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; | |||
prog->_LinkedShaders[j]->AtomicBuffers = | |||
rzalloc_array(prog, gl_active_atomic_buffer *, | |||
num_atomic_buffers[j]); | |||
unsigned intra_stage_idx = 0; | |||
for (unsigned i = 0; i < num_buffers; i++) { | |||
struct gl_active_atomic_buffer *atomic_buffer = | |||
&prog->AtomicBuffers[i]; | |||
if (atomic_buffer->StageReferences[j]) { | |||
prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = | |||
atomic_buffer; | |||
for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { | |||
prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = | |||
intra_stage_idx; | |||
prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = | |||
true; | |||
} | |||
intra_stage_idx++; | |||
} | |||
} | |||
} | |||
} | |||
delete [] abs; | |||
assert(i == num_buffers); | |||
} |
@@ -392,8 +392,6 @@ nir_visitor::visit(ir_variable *ir) | |||
var->data.index = ir->data.index; | |||
var->data.binding = ir->data.binding; | |||
/* XXX Get rid of buffer_index */ | |||
var->data.atomic.buffer_index = ir->data.binding; | |||
var->data.atomic.offset = ir->data.atomic.offset; | |||
var->data.image.read_only = ir->data.image_read_only; | |||
var->data.image.write_only = ir->data.image_write_only; |
@@ -308,7 +308,6 @@ typedef struct { | |||
* Location an atomic counter is stored at. | |||
*/ | |||
struct { | |||
unsigned buffer_index; | |||
unsigned offset; | |||
} atomic; | |||
@@ -1978,7 +1977,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); | |||
void nir_lower_two_sided_color(nir_shader *shader); | |||
void nir_lower_atomics(nir_shader *shader); | |||
void nir_lower_atomics(nir_shader *shader, | |||
const struct gl_shader_program *shader_program); | |||
void nir_lower_to_source_mods(nir_shader *shader); | |||
bool nir_lower_gs_intrinsics(nir_shader *shader); |
@@ -25,17 +25,24 @@ | |||
* | |||
*/ | |||
#include "ir_uniform.h" | |||
#include "nir.h" | |||
#include "main/config.h" | |||
#include <assert.h> | |||
typedef struct { | |||
const struct gl_shader_program *shader_program; | |||
nir_shader *shader; | |||
} lower_atomic_state; | |||
/* | |||
* replace atomic counter intrinsics that use a variable with intrinsics | |||
* that directly store the buffer index and byte offset | |||
*/ | |||
static void | |||
lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) | |||
lower_instr(nir_intrinsic_instr *instr, | |||
lower_atomic_state *state) | |||
{ | |||
nir_intrinsic_op op; | |||
switch (instr->intrinsic) { | |||
@@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) | |||
return; /* atomics passed as function arguments can't be lowered */ | |||
void *mem_ctx = ralloc_parent(instr); | |||
unsigned uniform_loc = instr->variables[0]->var->data.location; | |||
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); | |||
new_instr->const_index[0] = | |||
(int) instr->variables[0]->var->data.atomic.buffer_index; | |||
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; | |||
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); | |||
offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; | |||
@@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state) | |||
{ | |||
nir_foreach_instr_safe(block, instr) { | |||
if (instr->type == nir_instr_type_intrinsic) | |||
lower_instr(nir_instr_as_intrinsic(instr), state); | |||
lower_instr(nir_instr_as_intrinsic(instr), | |||
(lower_atomic_state *) state); | |||
} | |||
return true; | |||
} | |||
void | |||
nir_lower_atomics(nir_shader *shader) | |||
nir_lower_atomics(nir_shader *shader, | |||
const struct gl_shader_program *shader_program) | |||
{ | |||
lower_atomic_state state = { | |||
.shader = shader, | |||
.shader_program = shader_program, | |||
}; | |||
nir_foreach_overload(shader, overload) { | |||
if (overload->impl) { | |||
nir_foreach_block(overload->impl, lower_block, overload->impl); | |||
nir_foreach_block(overload->impl, lower_block, (void *) &state); | |||
nir_metadata_preserve(overload->impl, nir_metadata_block_index | | |||
nir_metadata_dominance); | |||
} |
@@ -1463,7 +1463,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw, | |||
struct brw_stage_prog_data *prog_data, | |||
bool dword_pitch); | |||
void brw_upload_abo_surfaces(struct brw_context *brw, | |||
struct gl_shader_program *prog, | |||
struct gl_shader *shader, | |||
struct brw_stage_state *stage_state, | |||
struct brw_stage_prog_data *prog_data); | |||
void brw_upload_image_surfaces(struct brw_context *brw, |
@@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw) | |||
if (prog) { | |||
/* BRW_NEW_GS_PROG_DATA */ | |||
brw_upload_abo_surfaces(brw, prog, &brw->gs.base, | |||
&brw->gs.prog_data->base.base); | |||
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], | |||
&brw->gs.base, &brw->gs.prog_data->base.base); | |||
} | |||
} | |||
@@ -249,8 +249,10 @@ brw_create_nir(struct brw_context *brw, | |||
nir_lower_system_values(nir); | |||
nir_validate_shader(nir); | |||
nir_lower_atomics(nir); | |||
nir_validate_shader(nir); | |||
if (shader_prog) { | |||
nir_lower_atomics(nir, shader_prog); | |||
nir_validate_shader(nir); | |||
} | |||
nir_optimize(nir, is_scalar); | |||
@@ -1191,9 +1191,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, | |||
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; | |||
} | |||
if (shader_prog && shader_prog->NumAtomicBuffers) { | |||
if (shader && shader->NumAtomicBuffers) { | |||
stage_prog_data->binding_table.abo_start = next_binding_table_offset; | |||
next_binding_table_offset += shader_prog->NumAtomicBuffers; | |||
next_binding_table_offset += shader->NumAtomicBuffers; | |||
} else { | |||
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; | |||
} |
@@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw) | |||
if (prog) { | |||
/* BRW_NEW_VS_PROG_DATA */ | |||
brw_upload_abo_surfaces(brw, prog, &brw->vs.base, | |||
&brw->vs.prog_data->base.base); | |||
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], | |||
&brw->vs.base, &brw->vs.prog_data->base.base); | |||
} | |||
} | |||
@@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = { | |||
void | |||
brw_upload_abo_surfaces(struct brw_context *brw, | |||
struct gl_shader_program *prog, | |||
struct gl_shader *shader, | |||
struct brw_stage_state *stage_state, | |||
struct brw_stage_prog_data *prog_data) | |||
{ | |||
@@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw, | |||
uint32_t *surf_offsets = | |||
&stage_state->surf_offset[prog_data->binding_table.abo_start]; | |||
for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { | |||
struct gl_atomic_buffer_binding *binding = | |||
&ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; | |||
struct intel_buffer_object *intel_bo = | |||
intel_buffer_object(binding->BufferObject); | |||
drm_intel_bo *bo = intel_bufferobj_buffer( | |||
brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); | |||
brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, | |||
binding->Offset, BRW_SURFACEFORMAT_RAW, | |||
bo->size - binding->Offset, 1, true); | |||
} | |||
if (shader && shader->NumAtomicBuffers) { | |||
for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) { | |||
struct gl_atomic_buffer_binding *binding = | |||
&ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding]; | |||
struct intel_buffer_object *intel_bo = | |||
intel_buffer_object(binding->BufferObject); | |||
drm_intel_bo *bo = intel_bufferobj_buffer( | |||
brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); | |||
brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, | |||
binding->Offset, BRW_SURFACEFORMAT_RAW, | |||
bo->size - binding->Offset, 1, true); | |||
} | |||
if (prog->NumAtomicBuffers) | |||
brw->ctx.NewDriverState |= BRW_NEW_SURFACES; | |||
} | |||
} | |||
static void | |||
@@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw) | |||
if (prog) { | |||
/* BRW_NEW_FS_PROG_DATA */ | |||
brw_upload_abo_surfaces(brw, prog, &brw->wm.base, | |||
&brw->wm.prog_data->base); | |||
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], | |||
&brw->wm.base, &brw->wm.prog_data->base); | |||
} | |||
} | |||
@@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw) | |||
if (prog) { | |||
/* BRW_NEW_CS_PROG_DATA */ | |||
brw_upload_abo_surfaces(brw, prog, &brw->cs.base, | |||
&brw->cs.prog_data->base); | |||
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], | |||
&brw->cs.base, &brw->cs.prog_data->base); | |||
} | |||
} | |||
@@ -2389,6 +2389,9 @@ struct gl_shader | |||
*/ | |||
GLuint NumImages; | |||
struct gl_active_atomic_buffer **AtomicBuffers; | |||
unsigned NumAtomicBuffers; | |||
/** | |||
* Whether early fragment tests are enabled as defined by | |||
* ARB_shader_image_load_store. | |||
@@ -4496,7 +4499,7 @@ static inline bool | |||
_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) | |||
{ | |||
return ctx->Shader._CurrentFragmentProgram != NULL && | |||
ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; | |||
ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0; | |||
} | |||
#ifdef __cplusplus |