Now that we have the stage state coming into our setup of sampler states, it's easy to drop an identifier into it of which stage the stage_state is, and then look up which packet to emit in a little table. No performance difference on cairo on glamor (n=492). v2: Don't forget to do the workaround flush on IVB. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>tags/mesa-10.2-rc1
@@ -628,6 +628,9 @@ brwCreateContext(gl_api api, | |||
brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; | |||
brw->has_swizzling = screen->hw_has_swizzling; | |||
brw->vs.base.stage = MESA_SHADER_VERTEX; | |||
brw->gs.base.stage = MESA_SHADER_GEOMETRY; | |||
brw->wm.base.stage = MESA_SHADER_FRAGMENT; | |||
if (brw->gen >= 8) { | |||
gen8_init_vtable_surface_functions(brw); | |||
gen7_init_vtable_sampler_functions(brw); |
@@ -925,6 +925,7 @@ struct brw_transform_feedback_object { | |||
*/ | |||
struct brw_stage_state | |||
{ | |||
gl_shader_stage stage; | |||
struct brw_stage_prog_data *prog_data; | |||
/** |
@@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw) | |||
/* CACHE_NEW_GS_PROG */ | |||
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; | |||
/* CACHE_NEW_SAMPLER */ | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); | |||
OUT_BATCH(stage_state->sampler_offset); | |||
ADVANCE_BATCH(); | |||
gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); | |||
/** | |||
@@ -198,7 +192,7 @@ const struct brw_tracked_state gen7_gs_state = { | |||
BRW_NEW_GS_BINDING_TABLE | | |||
BRW_NEW_BATCH | | |||
BRW_NEW_PUSH_CONSTANT_ALLOCATION), | |||
.cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER | |||
.cache = CACHE_NEW_GS_PROG | |||
}, | |||
.emit = upload_gs_state, | |||
}; |
@@ -187,6 +187,11 @@ gen7_upload_sampler_state_table(struct brw_context *brw, | |||
struct gl_context *ctx = &brw->ctx; | |||
struct gen7_sampler_state *samplers; | |||
uint32_t sampler_count = stage_state->sampler_count; | |||
static const uint16_t packet_headers[] = { | |||
[MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS, | |||
[MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS, | |||
[MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS, | |||
}; | |||
GLbitfield SamplersUsed = prog->SamplersUsed; | |||
@@ -207,7 +212,15 @@ gen7_upload_sampler_state_table(struct brw_context *brw, | |||
} | |||
} | |||
brw->state.dirty.cache |= CACHE_NEW_SAMPLER; | |||
if (brw->gen == 7 && !brw->is_haswell && | |||
stage_state->stage == MESA_SHADER_VERTEX) { | |||
gen7_emit_vs_workaround_flush(brw); | |||
} | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(packet_headers[stage_state->stage] << 16 | (2 - 2)); | |||
OUT_BATCH(stage_state->sampler_offset); | |||
ADVANCE_BATCH(); | |||
} | |||
void |
@@ -75,12 +75,6 @@ upload_vs_state(struct brw_context *brw) | |||
if (!brw->is_haswell) | |||
gen7_emit_vs_workaround_flush(brw); | |||
/* CACHE_NEW_SAMPLER */ | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); | |||
OUT_BATCH(stage_state->sampler_offset); | |||
ADVANCE_BATCH(); | |||
gen7_upload_constant_state(brw, stage_state, true /* active */, | |||
_3DSTATE_CONSTANT_VS); | |||
@@ -126,7 +120,7 @@ const struct brw_tracked_state gen7_vs_state = { | |||
BRW_NEW_VS_BINDING_TABLE | | |||
BRW_NEW_BATCH | | |||
BRW_NEW_PUSH_CONSTANT_ALLOCATION), | |||
.cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER | |||
.cache = CACHE_NEW_VS_PROG | |||
}, | |||
.emit = upload_vs_state, | |||
}; |
@@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw) | |||
const int max_threads_shift = brw->is_haswell ? | |||
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; | |||
/* CACHE_NEW_SAMPLER */ | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); | |||
OUT_BATCH(brw->wm.base.sampler_offset); | |||
ADVANCE_BATCH(); | |||
/* CACHE_NEW_WM_PROG */ | |||
gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); | |||
@@ -281,8 +275,7 @@ const struct brw_tracked_state gen7_ps_state = { | |||
BRW_NEW_PS_BINDING_TABLE | | |||
BRW_NEW_BATCH | | |||
BRW_NEW_PUSH_CONSTANT_ALLOCATION), | |||
.cache = (CACHE_NEW_SAMPLER | | |||
CACHE_NEW_WM_PROG) | |||
.cache = (CACHE_NEW_WM_PROG) | |||
}, | |||
.emit = upload_ps_state, | |||
}; |
@@ -36,12 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw) | |||
/* CACHE_NEW_GS_PROG */ | |||
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; | |||
/* CACHE_NEW_SAMPLER */ | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); | |||
OUT_BATCH(stage_state->sampler_offset); | |||
ADVANCE_BATCH(); | |||
gen8_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); | |||
if (active) { | |||
@@ -135,7 +129,7 @@ const struct brw_tracked_state gen8_gs_state = { | |||
BRW_NEW_GS_BINDING_TABLE | | |||
BRW_NEW_BATCH | | |||
BRW_NEW_PUSH_CONSTANT_ALLOCATION), | |||
.cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER | |||
.cache = CACHE_NEW_GS_PROG | |||
}, | |||
.emit = gen8_upload_gs_state, | |||
}; |
@@ -136,12 +136,6 @@ upload_ps_state(struct brw_context *brw) | |||
struct gl_context *ctx = &brw->ctx; | |||
uint32_t dw3 = 0, dw6 = 0, dw7 = 0; | |||
/* CACHE_NEW_SAMPLER */ | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); | |||
OUT_BATCH(brw->wm.base.sampler_offset); | |||
ADVANCE_BATCH(); | |||
/* CACHE_NEW_WM_PROG */ | |||
gen8_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); | |||
@@ -254,7 +248,7 @@ const struct brw_tracked_state gen8_ps_state = { | |||
BRW_NEW_PS_BINDING_TABLE | | |||
BRW_NEW_BATCH | | |||
BRW_NEW_PUSH_CONSTANT_ALLOCATION, | |||
.cache = CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG | |||
.cache = CACHE_NEW_WM_PROG | |||
}, | |||
.emit = upload_ps_state, | |||
}; |
@@ -62,12 +62,6 @@ upload_vs_state(struct brw_context *brw) | |||
/* CACHE_NEW_VS_PROG */ | |||
const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base; | |||
/* CACHE_NEW_SAMPLER */ | |||
BEGIN_BATCH(2); | |||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); | |||
OUT_BATCH(stage_state->sampler_offset); | |||
ADVANCE_BATCH(); | |||
gen8_upload_constant_state(brw, stage_state, true /* active */, | |||
_3DSTATE_CONSTANT_VS); | |||
@@ -119,7 +113,7 @@ const struct brw_tracked_state gen8_vs_state = { | |||
BRW_NEW_VS_BINDING_TABLE | | |||
BRW_NEW_BATCH | | |||
BRW_NEW_PUSH_CONSTANT_ALLOCATION, | |||
.cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER | |||
.cache = CACHE_NEW_VS_PROG | |||
}, | |||
.emit = upload_vs_state, | |||
}; |