Browse Source

i965: Move payload reg setup to compile, not lookup time.

Payload reg setup on gen6 depends more on the dispatch width as well
as the uses_depth, computes_depth, and other flags.  That's something
we want to decide at compile time, not at cache lookup.  As a bonus,
the fragment shader program cache lookup should be cheaper now that
there's less to compute for the hash key.
tags/android-x86-2.2
Eric Anholt 15 years ago
parent
commit
16f8c82389

+ 9
- 9
src/mesa/drivers/dri/i965/brw_fs.cpp View File

@@ -1955,7 +1955,7 @@ fs_visitor::emit_interpolation_setup_gen6()
emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));

this->current_annotation = "compute 1/pos.w";
this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
this->pixel_w = fs_reg(this, glsl_type::float_type);
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);

@@ -1983,17 +1983,17 @@ fs_visitor::emit_fb_writes()
nr += 2;
}

if (c->key.aa_dest_stencil_reg) {
if (c->aa_dest_stencil_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
}

/* Reserve space for color. It'll be filled in per MRT below. */
int color_mrf = nr;
nr += 4;

if (c->key.source_depth_to_render_target) {
if (c->key.computes_depth) {
if (c->source_depth_to_render_target) {
if (c->computes_depth) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
@@ -2002,13 +2002,13 @@ fs_visitor::emit_fb_writes()
} else {
/* Pass through the payload depth. */
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
}
}

if (c->key.dest_depth_reg) {
if (c->dest_depth_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
}

fs_reg color = reg_undef;
@@ -2458,7 +2458,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
void
fs_visitor::assign_curb_setup()
{
c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;

/* Map the offsets in the UNIFORM file to fixed HW regs. */

+ 61
- 53
src/mesa/drivers/dri/i965/brw_wm.c View File

@@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_emit(c);
}

static void
brw_wm_payload_setup(struct brw_context *brw,
struct brw_wm_compile *c)
{
struct intel_context *intel = &brw->intel;
bool uses_depth = (c->fp->program.Base.InputsRead &
(1 << FRAG_ATTRIB_WPOS)) != 0;

if (intel->gen >= 6) {
/* R0-1: masks, pixel X/Y coordinates. */
c->nr_payload_regs = 2;
/* R2: only for 32-pixel dispatch.*/
/* R3-4: perspective pixel location barycentric */
c->nr_payload_regs += 2;
/* R5-6: perspective pixel location bary for dispatch width != 8 */
if (!c->fp->isGLSL) { /* dispatch_width != 8 */
c->nr_payload_regs += 2;
}
/* R7-10: perspective centroid barycentric */
/* R11-14: perspective sample barycentric */
/* R15-18: linear pixel location barycentric */
/* R19-22: linear centroid barycentric */
/* R23-26: linear sample barycentric */

/* R27: interpolated depth if uses source depth */
if (uses_depth) {
c->source_depth_reg = c->nr_payload_regs;
c->nr_payload_regs++;
if (!c->fp->isGLSL) { /* dispatch_width != 8 */
/* R28: interpolated depth if not 8-wide. */
c->nr_payload_regs++;
}
}
/* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
*/
if (uses_depth) {
c->source_w_reg = c->nr_payload_regs;
c->nr_payload_regs++;
if (!c->fp->isGLSL) { /* dispatch_width != 8 */
/* R30: interpolated W if not 8-wide. */
c->nr_payload_regs++;
}
}
/* R31: MSAA position offsets. */
/* R32-: bary for 32-pixel. */
/* R58-59: interp W for 32-pixel. */

if (c->fp->program.Base.OutputsWritten &
BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
c->source_depth_to_render_target = GL_TRUE;
c->computes_depth = GL_TRUE;
}
} else {
brw_wm_lookup_iz(intel, c);
}
}

/**
* All Mesa program -> GPU code generation goes through this function.
@@ -167,6 +223,8 @@ static void do_wm_prog( struct brw_context *brw,

brw_init_compile(brw, &c->func);

brw_wm_payload_setup(brw, c);

/* temporary sanity check assertion */
ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));

@@ -220,12 +278,10 @@ static void do_wm_prog( struct brw_context *brw,
static void brw_wm_populate_key( struct brw_context *brw,
struct brw_wm_prog_key *key )
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -285,57 +341,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
}
}

if (intel->gen >= 6) {
/* R0-1: masks, pixel X/Y coordinates. */
key->nr_payload_regs = 2;
/* R2: only for 32-pixel dispatch.*/
/* R3-4: perspective pixel location barycentric */
key->nr_payload_regs += 2;
/* R5-6: perspective pixel location bary for dispatch width != 8 */
if (!fp->isGLSL) { /* dispatch_width != 8 */
key->nr_payload_regs += 2;
}
/* R7-10: perspective centroid barycentric */
/* R11-14: perspective sample barycentric */
/* R15-18: linear pixel location barycentric */
/* R19-22: linear centroid barycentric */
/* R23-26: linear sample barycentric */

/* R27: interpolated depth if uses source depth */
if (uses_depth) {
key->source_depth_reg = key->nr_payload_regs;
key->nr_payload_regs++;
if (!fp->isGLSL) { /* dispatch_width != 8 */
/* R28: interpolated depth if not 8-wide. */
key->nr_payload_regs++;
}
}
/* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
*/
if (uses_depth) {
key->source_w_reg = key->nr_payload_regs;
key->nr_payload_regs++;
if (!fp->isGLSL) { /* dispatch_width != 8 */
/* R30: interpolated W if not 8-wide. */
key->nr_payload_regs++;
}
}
/* R31: MSAA position offsets. */
/* R32-: bary for 32-pixel. */
/* R58-59: interp W for 32-pixel. */

if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
key->source_depth_to_render_target = GL_TRUE;
key->computes_depth = GL_TRUE;
}

} else {
brw_wm_lookup_iz(intel,
line_aa,
lookup,
uses_depth,
key);
}
key->iz_lookup = lookup;
key->line_aa = line_aa;
key->stats_wm = brw->intel.stats_wm;

/* BRW_NEW_WM_INPUT_DIMENSIONS */
key->proj_attrib_mask = brw->wm.input_size_masks[4-1];

+ 14
- 13
src/mesa/drivers/dri/i965/brw_wm.h View File

@@ -59,16 +59,9 @@
#define AA_ALWAYS 2

struct brw_wm_prog_key {
GLuint source_depth_reg:3;
GLuint source_w_reg:3;
GLuint aa_dest_stencil_reg:3;
GLuint dest_depth_reg:3;
GLuint nr_payload_regs:4;
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint stats_wm:1;
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint runtime_check_aads_emit:1;
GLuint nr_color_regions:5;
GLuint render_to_fbo:1;

@@ -81,6 +74,8 @@ struct brw_wm_prog_key {

GLushort drawable_height;
GLbitfield64 vp_outputs_written;
GLuint iz_lookup;
GLuint line_aa;
GLuint program_string_id:32;
};

@@ -204,6 +199,15 @@ struct brw_wm_compile {
PASS2_DONE
} state;

GLuint source_depth_reg:3;
GLuint source_w_reg:3;
GLuint aa_dest_stencil_reg:3;
GLuint dest_depth_reg:3;
GLuint nr_payload_regs:4;
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint runtime_check_aads_emit:1;

/* Initial pass - translate fp instructions to fp instructions,
* simplifying and adding instructions for interpolation and
* framebuffer writes.
@@ -306,11 +310,8 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );

void brw_wm_lookup_iz( struct intel_context *intel,
GLuint line_aa,
GLuint lookup,
GLboolean ps_uses_depth,
struct brw_wm_prog_key *key );
void brw_wm_lookup_iz(struct intel_context *intel,
struct brw_wm_compile *c);

GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);

+ 10
- 10
src/mesa/drivers/dri/i965/brw_wm_emit.c View File

@@ -1387,8 +1387,8 @@ static void emit_aa( struct brw_wm_compile *c,
GLuint reg )
{
struct brw_compile *p = &c->func;
GLuint comp = c->key.aa_dest_stencil_reg / 2;
GLuint off = c->key.aa_dest_stencil_reg % 2;
GLuint comp = c->aa_dest_stencil_reg / 2;
GLuint off = c->aa_dest_stencil_reg % 2;
struct brw_reg aa = offset(arg1[comp], off);

brw_push_insn_state(p);
@@ -1420,7 +1420,7 @@ void emit_fb_write(struct brw_wm_compile *c,

/* Reserve a space for AA - may not be needed:
*/
if (c->key.aa_dest_stencil_reg)
if (c->aa_dest_stencil_reg)
nr += 1;

/* I don't really understand how this achieves the color interleave
@@ -1493,9 +1493,9 @@ void emit_fb_write(struct brw_wm_compile *c,

brw_pop_insn_state(p);

if (c->key.source_depth_to_render_target)
if (c->source_depth_to_render_target)
{
if (c->key.computes_depth)
if (c->computes_depth)
brw_MOV(p, brw_message_reg(nr), arg2[2]);
else
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
@@ -1503,10 +1503,10 @@ void emit_fb_write(struct brw_wm_compile *c,
nr += 2;
}

if (c->key.dest_depth_reg)
if (c->dest_depth_reg)
{
GLuint comp = c->key.dest_depth_reg / 2;
GLuint off = c->key.dest_depth_reg % 2;
GLuint comp = c->dest_depth_reg / 2;
GLuint off = c->dest_depth_reg % 2;

if (off != 0) {
brw_push_insn_state(p);
@@ -1528,8 +1528,8 @@ void emit_fb_write(struct brw_wm_compile *c,
nr -= 2;
}

if (!c->key.runtime_check_aads_emit) {
if (c->key.aa_dest_stencil_reg)
if (!c->runtime_check_aads_emit) {
if (c->aa_dest_stencil_reg)
emit_aa(c, arg1, 2);

fire_fb_write(c, base_reg, nr, target, eot);

+ 4
- 4
src/mesa/drivers/dri/i965/brw_wm_glsl.c View File

@@ -290,15 +290,15 @@ static void prealloc_reg(struct brw_wm_compile *c)
c->first_free_grf = 0;

for (i = 0; i < 4; i++) {
if (i < (c->key.nr_payload_regs + 1) / 2)
if (i < (c->nr_payload_regs + 1) / 2)
reg = brw_vec8_grf(i * 2, 0);
else
reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0,
brw_vec8_grf(c->key.source_w_reg, 0));
reg_index += c->key.nr_payload_regs;
brw_vec8_grf(c->source_w_reg, 0));
reg_index += c->nr_payload_regs;

/* constants */
{
@@ -384,7 +384,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
}

c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);

+ 16
- 16
src/mesa/drivers/dri/i965/brw_wm_iz.c View File

@@ -120,14 +120,14 @@ const struct {
* \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
* \param lookup bitmask of IZ_* flags
*/
void brw_wm_lookup_iz( struct intel_context *intel,
GLuint line_aa,
GLuint lookup,
GLboolean ps_uses_depth,
struct brw_wm_prog_key *key )
void brw_wm_lookup_iz(struct intel_context *intel,
struct brw_wm_compile *c)
{
GLuint reg = 2;
GLboolean kill_stats_promoted_workaround = GL_FALSE;
int lookup = c->key.iz_lookup;
bool uses_depth = (c->fp->program.Base.InputsRead &
(1 << FRAG_ATTRIB_WPOS)) != 0;

assert (lookup < IZ_BIT_MAX);

@@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel,
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
*/
if (intel->stats_wm &&
if (c->key.stats_wm &&
(lookup & IZ_PS_KILL_ALPHATEST_BIT) &&
wm_iz_table[lookup].mode == P) {
kill_stats_promoted_workaround = GL_TRUE;
}

if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
key->computes_depth = 1;
c->computes_depth = 1;

if (wm_iz_table[lookup].sd_present || ps_uses_depth ||
if (wm_iz_table[lookup].sd_present || uses_depth ||
kill_stats_promoted_workaround) {
key->source_depth_reg = reg;
c->source_depth_reg = reg;
reg += 2;
}

if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
key->source_depth_to_render_target = 1;
c->source_depth_to_render_target = 1;

if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
key->aa_dest_stencil_reg = reg;
key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
line_aa == AA_SOMETIMES);
if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) {
c->aa_dest_stencil_reg = reg;
c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
c->key.line_aa == AA_SOMETIMES);
reg++;
}

if (wm_iz_table[lookup].dd_present) {
key->dest_depth_reg = reg;
c->dest_depth_reg = reg;
reg+=2;
}

key->nr_payload_regs = reg;
c->nr_payload_regs = reg;
}


+ 1
- 1
src/mesa/drivers/dri/i965/brw_wm_pass0.c View File

@@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
GLuint i;

for (i = 0; i < 4; i++) {
GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i;
GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i;
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
&c->payload.depth[j] );
}

+ 1
- 2
src/mesa/drivers/dri/i965/brw_wm_pass1.c View File

@@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
if (inst->opcode == WM_FB_WRITE) {
track_arg(c, inst, 0, WRITEMASK_XYZW);
track_arg(c, inst, 1, WRITEMASK_XYZW);
if (c->key.source_depth_to_render_target &&
c->key.computes_depth)
if (c->source_depth_to_render_target && c->computes_depth)
track_arg(c, inst, 2, WRITEMASK_Z);
else
track_arg(c, inst, 2, 0);

+ 2
- 2
src/mesa/drivers/dri/i965/brw_wm_pass2.c View File

@@ -76,7 +76,7 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->grf_limit; j++)
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;

for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++)
for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++)
prealloc_reg(c, &c->payload.depth[j], i++);

for (j = 0; j < c->nr_creg; j++)
@@ -101,7 +101,7 @@ static void init_registers( struct brw_wm_compile *c )

assert(nr_interp_regs >= 1);

c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2);
c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2);
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg * 2;


Loading…
Cancel
Save