Payload reg setup on gen6 depends more on the dispatch width as well as the uses_depth, computes_depth, and other flags. That's something we want to decide at compile time, not at cache lookup. As a bonus, the fragment shader program cache lookup should be cheaper now that there's less to compute for the hash key.tags/android-x86-2.2
@@ -1955,7 +1955,7 @@ fs_visitor::emit_interpolation_setup_gen6() | |||
emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); | |||
this->current_annotation = "compute 1/pos.w"; | |||
this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); | |||
this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); | |||
this->pixel_w = fs_reg(this, glsl_type::float_type); | |||
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); | |||
@@ -1983,17 +1983,17 @@ fs_visitor::emit_fb_writes() | |||
nr += 2; | |||
} | |||
if (c->key.aa_dest_stencil_reg) { | |||
if (c->aa_dest_stencil_reg) { | |||
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), | |||
fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); | |||
fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); | |||
} | |||
/* Reserve space for color. It'll be filled in per MRT below. */ | |||
int color_mrf = nr; | |||
nr += 4; | |||
if (c->key.source_depth_to_render_target) { | |||
if (c->key.computes_depth) { | |||
if (c->source_depth_to_render_target) { | |||
if (c->computes_depth) { | |||
/* Hand over gl_FragDepth. */ | |||
assert(this->frag_depth); | |||
fs_reg depth = *(variable_storage(this->frag_depth)); | |||
@@ -2002,13 +2002,13 @@ fs_visitor::emit_fb_writes() | |||
} else { | |||
/* Pass through the payload depth. */ | |||
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), | |||
fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); | |||
fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); | |||
} | |||
} | |||
if (c->key.dest_depth_reg) { | |||
if (c->dest_depth_reg) { | |||
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), | |||
fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); | |||
fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); | |||
} | |||
fs_reg color = reg_undef; | |||
@@ -2458,7 +2458,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) | |||
void | |||
fs_visitor::assign_curb_setup() | |||
{ | |||
c->prog_data.first_curbe_grf = c->key.nr_payload_regs; | |||
c->prog_data.first_curbe_grf = c->nr_payload_regs; | |||
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; | |||
/* Map the offsets in the UNIFORM file to fixed HW regs. */ |
@@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) | |||
brw_wm_emit(c); | |||
} | |||
static void | |||
brw_wm_payload_setup(struct brw_context *brw, | |||
struct brw_wm_compile *c) | |||
{ | |||
struct intel_context *intel = &brw->intel; | |||
bool uses_depth = (c->fp->program.Base.InputsRead & | |||
(1 << FRAG_ATTRIB_WPOS)) != 0; | |||
if (intel->gen >= 6) { | |||
/* R0-1: masks, pixel X/Y coordinates. */ | |||
c->nr_payload_regs = 2; | |||
/* R2: only for 32-pixel dispatch.*/ | |||
/* R3-4: perspective pixel location barycentric */ | |||
c->nr_payload_regs += 2; | |||
/* R5-6: perspective pixel location bary for dispatch width != 8 */ | |||
if (!c->fp->isGLSL) { /* dispatch_width != 8 */ | |||
c->nr_payload_regs += 2; | |||
} | |||
/* R7-10: perspective centroid barycentric */ | |||
/* R11-14: perspective sample barycentric */ | |||
/* R15-18: linear pixel location barycentric */ | |||
/* R19-22: linear centroid barycentric */ | |||
/* R23-26: linear sample barycentric */ | |||
/* R27: interpolated depth if uses source depth */ | |||
if (uses_depth) { | |||
c->source_depth_reg = c->nr_payload_regs; | |||
c->nr_payload_regs++; | |||
if (!c->fp->isGLSL) { /* dispatch_width != 8 */ | |||
/* R28: interpolated depth if not 8-wide. */ | |||
c->nr_payload_regs++; | |||
} | |||
} | |||
/* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. | |||
*/ | |||
if (uses_depth) { | |||
c->source_w_reg = c->nr_payload_regs; | |||
c->nr_payload_regs++; | |||
if (!c->fp->isGLSL) { /* dispatch_width != 8 */ | |||
/* R30: interpolated W if not 8-wide. */ | |||
c->nr_payload_regs++; | |||
} | |||
} | |||
/* R31: MSAA position offsets. */ | |||
/* R32-: bary for 32-pixel. */ | |||
/* R58-59: interp W for 32-pixel. */ | |||
if (c->fp->program.Base.OutputsWritten & | |||
BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { | |||
c->source_depth_to_render_target = GL_TRUE; | |||
c->computes_depth = GL_TRUE; | |||
} | |||
} else { | |||
brw_wm_lookup_iz(intel, c); | |||
} | |||
} | |||
/** | |||
* All Mesa program -> GPU code generation goes through this function. | |||
@@ -167,6 +223,8 @@ static void do_wm_prog( struct brw_context *brw, | |||
brw_init_compile(brw, &c->func); | |||
brw_wm_payload_setup(brw, c); | |||
/* temporary sanity check assertion */ | |||
ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); | |||
@@ -220,12 +278,10 @@ static void do_wm_prog( struct brw_context *brw, | |||
static void brw_wm_populate_key( struct brw_context *brw, | |||
struct brw_wm_prog_key *key ) | |||
{ | |||
struct intel_context *intel = &brw->intel; | |||
struct gl_context *ctx = &brw->intel.ctx; | |||
/* BRW_NEW_FRAGMENT_PROGRAM */ | |||
const struct brw_fragment_program *fp = | |||
(struct brw_fragment_program *)brw->fragment_program; | |||
GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; | |||
GLuint lookup = 0; | |||
GLuint line_aa; | |||
GLuint i; | |||
@@ -285,57 +341,9 @@ static void brw_wm_populate_key( struct brw_context *brw, | |||
} | |||
} | |||
if (intel->gen >= 6) { | |||
/* R0-1: masks, pixel X/Y coordinates. */ | |||
key->nr_payload_regs = 2; | |||
/* R2: only for 32-pixel dispatch.*/ | |||
/* R3-4: perspective pixel location barycentric */ | |||
key->nr_payload_regs += 2; | |||
/* R5-6: perspective pixel location bary for dispatch width != 8 */ | |||
if (!fp->isGLSL) { /* dispatch_width != 8 */ | |||
key->nr_payload_regs += 2; | |||
} | |||
/* R7-10: perspective centroid barycentric */ | |||
/* R11-14: perspective sample barycentric */ | |||
/* R15-18: linear pixel location barycentric */ | |||
/* R19-22: linear centroid barycentric */ | |||
/* R23-26: linear sample barycentric */ | |||
/* R27: interpolated depth if uses source depth */ | |||
if (uses_depth) { | |||
key->source_depth_reg = key->nr_payload_regs; | |||
key->nr_payload_regs++; | |||
if (!fp->isGLSL) { /* dispatch_width != 8 */ | |||
/* R28: interpolated depth if not 8-wide. */ | |||
key->nr_payload_regs++; | |||
} | |||
} | |||
/* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. | |||
*/ | |||
if (uses_depth) { | |||
key->source_w_reg = key->nr_payload_regs; | |||
key->nr_payload_regs++; | |||
if (!fp->isGLSL) { /* dispatch_width != 8 */ | |||
/* R30: interpolated W if not 8-wide. */ | |||
key->nr_payload_regs++; | |||
} | |||
} | |||
/* R31: MSAA position offsets. */ | |||
/* R32-: bary for 32-pixel. */ | |||
/* R58-59: interp W for 32-pixel. */ | |||
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { | |||
key->source_depth_to_render_target = GL_TRUE; | |||
key->computes_depth = GL_TRUE; | |||
} | |||
} else { | |||
brw_wm_lookup_iz(intel, | |||
line_aa, | |||
lookup, | |||
uses_depth, | |||
key); | |||
} | |||
key->iz_lookup = lookup; | |||
key->line_aa = line_aa; | |||
key->stats_wm = brw->intel.stats_wm; | |||
/* BRW_NEW_WM_INPUT_DIMENSIONS */ | |||
key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; |
@@ -59,16 +59,9 @@ | |||
#define AA_ALWAYS 2 | |||
struct brw_wm_prog_key { | |||
GLuint source_depth_reg:3; | |||
GLuint source_w_reg:3; | |||
GLuint aa_dest_stencil_reg:3; | |||
GLuint dest_depth_reg:3; | |||
GLuint nr_payload_regs:4; | |||
GLuint computes_depth:1; /* could be derived from program string */ | |||
GLuint source_depth_to_render_target:1; | |||
GLuint stats_wm:1; | |||
GLuint flat_shade:1; | |||
GLuint linear_color:1; /**< linear interpolation vs perspective interp */ | |||
GLuint runtime_check_aads_emit:1; | |||
GLuint nr_color_regions:5; | |||
GLuint render_to_fbo:1; | |||
@@ -81,6 +74,8 @@ struct brw_wm_prog_key { | |||
GLushort drawable_height; | |||
GLbitfield64 vp_outputs_written; | |||
GLuint iz_lookup; | |||
GLuint line_aa; | |||
GLuint program_string_id:32; | |||
}; | |||
@@ -204,6 +199,15 @@ struct brw_wm_compile { | |||
PASS2_DONE | |||
} state; | |||
GLuint source_depth_reg:3; | |||
GLuint source_w_reg:3; | |||
GLuint aa_dest_stencil_reg:3; | |||
GLuint dest_depth_reg:3; | |||
GLuint nr_payload_regs:4; | |||
GLuint computes_depth:1; /* could be derived from program string */ | |||
GLuint source_depth_to_render_target:1; | |||
GLuint runtime_check_aads_emit:1; | |||
/* Initial pass - translate fp instructions to fp instructions, | |||
* simplifying and adding instructions for interpolation and | |||
* framebuffer writes. | |||
@@ -306,11 +310,8 @@ void brw_wm_print_insn( struct brw_wm_compile *c, | |||
void brw_wm_print_program( struct brw_wm_compile *c, | |||
const char *stage ); | |||
void brw_wm_lookup_iz( struct intel_context *intel, | |||
GLuint line_aa, | |||
GLuint lookup, | |||
GLboolean ps_uses_depth, | |||
struct brw_wm_prog_key *key ); | |||
void brw_wm_lookup_iz(struct intel_context *intel, | |||
struct brw_wm_compile *c); | |||
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); | |||
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); |
@@ -1387,8 +1387,8 @@ static void emit_aa( struct brw_wm_compile *c, | |||
GLuint reg ) | |||
{ | |||
struct brw_compile *p = &c->func; | |||
GLuint comp = c->key.aa_dest_stencil_reg / 2; | |||
GLuint off = c->key.aa_dest_stencil_reg % 2; | |||
GLuint comp = c->aa_dest_stencil_reg / 2; | |||
GLuint off = c->aa_dest_stencil_reg % 2; | |||
struct brw_reg aa = offset(arg1[comp], off); | |||
brw_push_insn_state(p); | |||
@@ -1420,7 +1420,7 @@ void emit_fb_write(struct brw_wm_compile *c, | |||
/* Reserve a space for AA - may not be needed: | |||
*/ | |||
if (c->key.aa_dest_stencil_reg) | |||
if (c->aa_dest_stencil_reg) | |||
nr += 1; | |||
/* I don't really understand how this achieves the color interleave | |||
@@ -1493,9 +1493,9 @@ void emit_fb_write(struct brw_wm_compile *c, | |||
brw_pop_insn_state(p); | |||
if (c->key.source_depth_to_render_target) | |||
if (c->source_depth_to_render_target) | |||
{ | |||
if (c->key.computes_depth) | |||
if (c->computes_depth) | |||
brw_MOV(p, brw_message_reg(nr), arg2[2]); | |||
else | |||
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ | |||
@@ -1503,10 +1503,10 @@ void emit_fb_write(struct brw_wm_compile *c, | |||
nr += 2; | |||
} | |||
if (c->key.dest_depth_reg) | |||
if (c->dest_depth_reg) | |||
{ | |||
GLuint comp = c->key.dest_depth_reg / 2; | |||
GLuint off = c->key.dest_depth_reg % 2; | |||
GLuint comp = c->dest_depth_reg / 2; | |||
GLuint off = c->dest_depth_reg % 2; | |||
if (off != 0) { | |||
brw_push_insn_state(p); | |||
@@ -1528,8 +1528,8 @@ void emit_fb_write(struct brw_wm_compile *c, | |||
nr -= 2; | |||
} | |||
if (!c->key.runtime_check_aads_emit) { | |||
if (c->key.aa_dest_stencil_reg) | |||
if (!c->runtime_check_aads_emit) { | |||
if (c->aa_dest_stencil_reg) | |||
emit_aa(c, arg1, 2); | |||
fire_fb_write(c, base_reg, nr, target, eot); |
@@ -290,15 +290,15 @@ static void prealloc_reg(struct brw_wm_compile *c) | |||
c->first_free_grf = 0; | |||
for (i = 0; i < 4; i++) { | |||
if (i < (c->key.nr_payload_regs + 1) / 2) | |||
if (i < (c->nr_payload_regs + 1) / 2) | |||
reg = brw_vec8_grf(i * 2, 0); | |||
else | |||
reg = brw_vec8_grf(0, 0); | |||
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); | |||
} | |||
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0, | |||
brw_vec8_grf(c->key.source_w_reg, 0)); | |||
reg_index += c->key.nr_payload_regs; | |||
brw_vec8_grf(c->source_w_reg, 0)); | |||
reg_index += c->nr_payload_regs; | |||
/* constants */ | |||
{ | |||
@@ -384,7 +384,7 @@ static void prealloc_reg(struct brw_wm_compile *c) | |||
} | |||
} | |||
c->prog_data.first_curbe_grf = c->key.nr_payload_regs; | |||
c->prog_data.first_curbe_grf = c->nr_payload_regs; | |||
c->prog_data.urb_read_length = urb_read_length; | |||
c->prog_data.curb_read_length = c->nr_creg; | |||
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); |
@@ -120,14 +120,14 @@ const struct { | |||
* \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES | |||
* \param lookup bitmask of IZ_* flags | |||
*/ | |||
void brw_wm_lookup_iz( struct intel_context *intel, | |||
GLuint line_aa, | |||
GLuint lookup, | |||
GLboolean ps_uses_depth, | |||
struct brw_wm_prog_key *key ) | |||
void brw_wm_lookup_iz(struct intel_context *intel, | |||
struct brw_wm_compile *c) | |||
{ | |||
GLuint reg = 2; | |||
GLboolean kill_stats_promoted_workaround = GL_FALSE; | |||
int lookup = c->key.iz_lookup; | |||
bool uses_depth = (c->fp->program.Base.InputsRead & | |||
(1 << FRAG_ATTRIB_WPOS)) != 0; | |||
assert (lookup < IZ_BIT_MAX); | |||
@@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel, | |||
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth | |||
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. | |||
*/ | |||
if (intel->stats_wm && | |||
if (c->key.stats_wm && | |||
(lookup & IZ_PS_KILL_ALPHATEST_BIT) && | |||
wm_iz_table[lookup].mode == P) { | |||
kill_stats_promoted_workaround = GL_TRUE; | |||
} | |||
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) | |||
key->computes_depth = 1; | |||
c->computes_depth = 1; | |||
if (wm_iz_table[lookup].sd_present || ps_uses_depth || | |||
if (wm_iz_table[lookup].sd_present || uses_depth || | |||
kill_stats_promoted_workaround) { | |||
key->source_depth_reg = reg; | |||
c->source_depth_reg = reg; | |||
reg += 2; | |||
} | |||
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) | |||
key->source_depth_to_render_target = 1; | |||
c->source_depth_to_render_target = 1; | |||
if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { | |||
key->aa_dest_stencil_reg = reg; | |||
key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && | |||
line_aa == AA_SOMETIMES); | |||
if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) { | |||
c->aa_dest_stencil_reg = reg; | |||
c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && | |||
c->key.line_aa == AA_SOMETIMES); | |||
reg++; | |||
} | |||
if (wm_iz_table[lookup].dd_present) { | |||
key->dest_depth_reg = reg; | |||
c->dest_depth_reg = reg; | |||
reg+=2; | |||
} | |||
key->nr_payload_regs = reg; | |||
c->nr_payload_regs = reg; | |||
} | |||
@@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) | |||
GLuint i; | |||
for (i = 0; i < 4; i++) { | |||
GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i; | |||
GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i; | |||
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, | |||
&c->payload.depth[j] ); | |||
} |
@@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) | |||
if (inst->opcode == WM_FB_WRITE) { | |||
track_arg(c, inst, 0, WRITEMASK_XYZW); | |||
track_arg(c, inst, 1, WRITEMASK_XYZW); | |||
if (c->key.source_depth_to_render_target && | |||
c->key.computes_depth) | |||
if (c->source_depth_to_render_target && c->computes_depth) | |||
track_arg(c, inst, 2, WRITEMASK_Z); | |||
else | |||
track_arg(c, inst, 2, 0); |
@@ -76,7 +76,7 @@ static void init_registers( struct brw_wm_compile *c ) | |||
for (j = 0; j < c->grf_limit; j++) | |||
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; | |||
for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++) | |||
for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++) | |||
prealloc_reg(c, &c->payload.depth[j], i++); | |||
for (j = 0; j < c->nr_creg; j++) | |||
@@ -101,7 +101,7 @@ static void init_registers( struct brw_wm_compile *c ) | |||
assert(nr_interp_regs >= 1); | |||
c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2); | |||
c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2); | |||
c->prog_data.urb_read_length = nr_interp_regs * 2; | |||
c->prog_data.curb_read_length = c->nr_creg * 2; | |||