For an app that's blowing out the state cache, like sauerbraten, the memset of the giant arrays ended up taking 11% of the CPU even when only a "few" of the entries got used. With this, the WM program compile drops back down to 1% of CPU time. Bug #24981 (bisected to BRW_WM_MAX_INSN increase).tags/mesa_7_7_rc1
@@ -152,8 +152,22 @@ static void do_wm_prog( struct brw_context *brw, | |||
*/ | |||
return; | |||
} | |||
c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); | |||
c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * | |||
sizeof(*c->prog_instructions)); | |||
c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); | |||
c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); | |||
c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); | |||
} else { | |||
void *instruction = c->instruction; | |||
void *prog_instructions = c->prog_instructions; | |||
void *vreg = c->vreg; | |||
void *refs = c->refs; | |||
memset(c, 0, sizeof(*brw->wm.compile_data)); | |||
c->instruction = instruction; | |||
c->prog_instructions = prog_instructions; | |||
c->vreg = vreg; | |||
c->refs = refs; | |||
} | |||
memcpy(&c->key, key, sizeof(*key)); | |||
@@ -202,7 +202,7 @@ struct brw_wm_compile { | |||
* simplifying and adding instructions for interpolation and | |||
* framebuffer writes. | |||
*/ | |||
struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; | |||
struct prog_instruction *prog_instructions; | |||
GLuint nr_fp_insns; | |||
GLuint fp_temp; | |||
GLuint fp_interp_emitted; | |||
@@ -213,7 +213,7 @@ struct brw_wm_compile { | |||
struct prog_src_register pixel_w; | |||
struct brw_wm_value vreg[BRW_WM_MAX_VREG]; | |||
struct brw_wm_value *vreg; | |||
GLuint nr_vreg; | |||
struct brw_wm_value creg[BRW_WM_MAX_PARAM]; | |||
@@ -230,10 +230,10 @@ struct brw_wm_compile { | |||
struct brw_wm_ref undef_ref; | |||
struct brw_wm_value undef_value; | |||
struct brw_wm_ref refs[BRW_WM_MAX_REF]; | |||
struct brw_wm_ref *refs; | |||
GLuint nr_refs; | |||
struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; | |||
struct brw_wm_instruction *instruction; | |||
GLuint nr_insns; | |||
struct brw_wm_constref constref[BRW_WM_MAX_CONST]; |
@@ -182,6 +182,8 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem | |||
static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) | |||
{ | |||
assert(c->nr_fp_insns < BRW_WM_MAX_INSN); | |||
memset(&c->prog_instructions[c->nr_fp_insns], 0, | |||
sizeof(*c->prog_instructions)); | |||
return &c->prog_instructions[c->nr_fp_insns++]; | |||
} | |||
@@ -42,12 +42,14 @@ | |||
static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) | |||
{ | |||
assert(c->nr_refs < BRW_WM_MAX_REF); | |||
memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs)); | |||
return &c->refs[c->nr_refs++]; | |||
} | |||
static struct brw_wm_value *get_value( struct brw_wm_compile *c) | |||
{ | |||
assert(c->nr_refs < BRW_WM_MAX_VREG); | |||
memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg)); | |||
return &c->vreg[c->nr_vreg++]; | |||
} | |||
@@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) | |||
static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) | |||
{ | |||
assert(c->nr_insns < BRW_WM_MAX_INSN); | |||
memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction)); | |||
return &c->instruction[c->nr_insns++]; | |||
} | |||