Browse Source

i965/gen7+: Move sampler state packets to the stage sampler state table update.

Now that we have the stage state coming into our setup of sampler states,
it's easy to drop an identifier into it of which stage the stage_state is,
and then look up which packet to emit in a little table.

No performance difference on cairo on glamor (n=492).

v2: Don't forget to do the workaround flush on IVB.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
tags/mesa-10.2-rc1
Eric Anholt 11 years ago
parent
commit
f9a2679db5

+ 3
- 0
src/mesa/drivers/dri/i965/brw_context.c View File

@@ -628,6 +628,9 @@ brwCreateContext(gl_api api,
brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
brw->has_swizzling = screen->hw_has_swizzling;

brw->vs.base.stage = MESA_SHADER_VERTEX;
brw->gs.base.stage = MESA_SHADER_GEOMETRY;
brw->wm.base.stage = MESA_SHADER_FRAGMENT;
if (brw->gen >= 8) {
gen8_init_vtable_surface_functions(brw);
gen7_init_vtable_sampler_functions(brw);

+ 1
- 0
src/mesa/drivers/dri/i965/brw_context.h View File

@@ -925,6 +925,7 @@ struct brw_transform_feedback_object {
*/
struct brw_stage_state
{
gl_shader_stage stage;
struct brw_stage_prog_data *prog_data;

/**

+ 1
- 7
src/mesa/drivers/dri/i965/gen7_gs_state.c View File

@@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw)
/* CACHE_NEW_GS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;

/* CACHE_NEW_SAMPLER */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2));
OUT_BATCH(stage_state->sampler_offset);
ADVANCE_BATCH();

gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);

/**
@@ -198,7 +192,7 @@ const struct brw_tracked_state gen7_gs_state = {
BRW_NEW_GS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER
.cache = CACHE_NEW_GS_PROG
},
.emit = upload_gs_state,
};

+ 14
- 1
src/mesa/drivers/dri/i965/gen7_sampler_state.c View File

@@ -187,6 +187,11 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
struct gen7_sampler_state *samplers;
uint32_t sampler_count = stage_state->sampler_count;
static const uint16_t packet_headers[] = {
[MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS,
[MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS,
[MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS,
};

GLbitfield SamplersUsed = prog->SamplersUsed;

@@ -207,7 +212,15 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
}
}

brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
if (brw->gen == 7 && !brw->is_haswell &&
stage_state->stage == MESA_SHADER_VERTEX) {
gen7_emit_vs_workaround_flush(brw);
}

BEGIN_BATCH(2);
OUT_BATCH(packet_headers[stage_state->stage] << 16 | (2 - 2));
OUT_BATCH(stage_state->sampler_offset);
ADVANCE_BATCH();
}

void

+ 1
- 7
src/mesa/drivers/dri/i965/gen7_vs_state.c View File

@@ -75,12 +75,6 @@ upload_vs_state(struct brw_context *brw)
if (!brw->is_haswell)
gen7_emit_vs_workaround_flush(brw);

/* CACHE_NEW_SAMPLER */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
OUT_BATCH(stage_state->sampler_offset);
ADVANCE_BATCH();

gen7_upload_constant_state(brw, stage_state, true /* active */,
_3DSTATE_CONSTANT_VS);

@@ -126,7 +120,7 @@ const struct brw_tracked_state gen7_vs_state = {
BRW_NEW_VS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
};

+ 1
- 8
src/mesa/drivers/dri/i965/gen7_wm_state.c View File

@@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw)
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;

/* CACHE_NEW_SAMPLER */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
OUT_BATCH(brw->wm.base.sampler_offset);
ADVANCE_BATCH();

/* CACHE_NEW_WM_PROG */
gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);

@@ -281,8 +275,7 @@ const struct brw_tracked_state gen7_ps_state = {
BRW_NEW_PS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = (CACHE_NEW_SAMPLER |
CACHE_NEW_WM_PROG)
.cache = (CACHE_NEW_WM_PROG)
},
.emit = upload_ps_state,
};

+ 1
- 7
src/mesa/drivers/dri/i965/gen8_gs_state.c View File

@@ -36,12 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw)
/* CACHE_NEW_GS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;

/* CACHE_NEW_SAMPLER */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2));
OUT_BATCH(stage_state->sampler_offset);
ADVANCE_BATCH();

gen8_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);

if (active) {
@@ -135,7 +129,7 @@ const struct brw_tracked_state gen8_gs_state = {
BRW_NEW_GS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER
.cache = CACHE_NEW_GS_PROG
},
.emit = gen8_upload_gs_state,
};

+ 1
- 7
src/mesa/drivers/dri/i965/gen8_ps_state.c View File

@@ -136,12 +136,6 @@ upload_ps_state(struct brw_context *brw)
struct gl_context *ctx = &brw->ctx;
uint32_t dw3 = 0, dw6 = 0, dw7 = 0;

/* CACHE_NEW_SAMPLER */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
OUT_BATCH(brw->wm.base.sampler_offset);
ADVANCE_BATCH();

/* CACHE_NEW_WM_PROG */
gen8_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);

@@ -254,7 +248,7 @@ const struct brw_tracked_state gen8_ps_state = {
BRW_NEW_PS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION,
.cache = CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG
.cache = CACHE_NEW_WM_PROG
},
.emit = upload_ps_state,
};

+ 1
- 7
src/mesa/drivers/dri/i965/gen8_vs_state.c View File

@@ -62,12 +62,6 @@ upload_vs_state(struct brw_context *brw)
/* CACHE_NEW_VS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;

/* CACHE_NEW_SAMPLER */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
OUT_BATCH(stage_state->sampler_offset);
ADVANCE_BATCH();

gen8_upload_constant_state(brw, stage_state, true /* active */,
_3DSTATE_CONSTANT_VS);

@@ -119,7 +113,7 @@ const struct brw_tracked_state gen8_vs_state = {
BRW_NEW_VS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION,
.cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
};

Loading…
Cancel
Save