This has more of a negative impact than the previous patch, as on Gen6 passing primitives through to the clipper means we actually have to make the GS thread write them to the URB. I don't see another good solution though, and rasterizer discard is not the most common of cases, so hopefully it won't be too terrible. v2: Add a perf_debug; resolve rebase conflicts on the brw dirty flags; remove the rasterizer_discard field from brw_gs_prog_key. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net> [v1] Reviewed-by: Paul Berry <stereotype441@gmail.com>tags/mesa-9.2-rc1
@@ -214,12 +214,6 @@ static void populate_key( struct brw_context *brw, | |||
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; | |||
} | |||
} | |||
/* On Gen6, GS is also used for rasterizer discard. */ | |||
/* BRW_NEW_RASTERIZER_DISCARD */ | |||
if (ctx->RasterDiscard) { | |||
key->need_gs_prog = true; | |||
key->rasterizer_discard = true; | |||
} | |||
} else { | |||
/* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP | |||
* into simpler primitives. | |||
@@ -259,8 +253,7 @@ const struct brw_tracked_state brw_gs_prog = { | |||
.dirty = { | |||
.mesa = (_NEW_LIGHT), | |||
.brw = (BRW_NEW_PRIMITIVE | | |||
BRW_NEW_TRANSFORM_FEEDBACK | | |||
BRW_NEW_RASTERIZER_DISCARD), | |||
BRW_NEW_TRANSFORM_FEEDBACK), | |||
.cache = CACHE_NEW_VS_PROG | |||
}, | |||
.emit = brw_upload_gs_prog |
@@ -49,7 +49,6 @@ struct brw_gs_prog_key { | |||
GLuint pv_first:1; | |||
GLuint need_gs_prog:1; | |||
GLuint rasterizer_discard:1; | |||
/** | |||
* Number of varyings that are output to transform feedback. |
@@ -200,28 +200,6 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, | |||
} | |||
} | |||
/** | |||
* De-allocate the URB entry that was previously allocated to this thread | |||
* (without writing any vertex data to it), and terminate the thread. This is | |||
* used to implement RASTERIZER_DISCARD functionality. | |||
*/ | |||
static void brw_gs_terminate(struct brw_gs_compile *c) | |||
{ | |||
struct brw_compile *p = &c->func; | |||
brw_urb_WRITE(p, | |||
retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */ | |||
0, /* msg_reg_nr */ | |||
c->reg.header, /* src0 */ | |||
false, /* allocate */ | |||
false, /* used */ | |||
1, /* msg_length */ | |||
0, /* response_length */ | |||
true, /* eot */ | |||
true, /* writes_complete */ | |||
0, /* offset */ | |||
BRW_URB_SWIZZLE_NONE); | |||
} | |||
/** | |||
* Send an FF_SYNC message to ensure that all previously spawned GS threads | |||
* have finished sending primitives down the pipeline, and to allocate a URB | |||
@@ -484,14 +462,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, | |||
brw_gs_ff_sync(c, 1); | |||
/* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so | |||
* release the URB that was just allocated, and terminate the thread. | |||
*/ | |||
if (key->rasterizer_discard) { | |||
brw_gs_terminate(c); | |||
return; | |||
} | |||
brw_gs_overwrite_header_dw2_from_r0(c); | |||
switch (num_verts) { | |||
case 1: |
@@ -76,6 +76,13 @@ upload_clip_state(struct brw_context *brw) | |||
dw2 |= GEN6_CLIP_GB_TEST; | |||
} | |||
/* BRW_NEW_RASTERIZER_DISCARD */ | |||
if (ctx->RasterDiscard) { | |||
dw2 |= GEN6_CLIP_MODE_REJECT_ALL; | |||
perf_debug("Rasterizer discard is currently implemented via the clipper; " | |||
"having the GS not write primitives would likely be faster."); | |||
} | |||
BEGIN_BATCH(4); | |||
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); | |||
OUT_BATCH(dw1); | |||
@@ -93,7 +100,9 @@ upload_clip_state(struct brw_context *brw) | |||
const struct brw_tracked_state gen6_clip_state = { | |||
.dirty = { | |||
.mesa = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS, | |||
.brw = BRW_NEW_CONTEXT | BRW_NEW_META_IN_PROGRESS, | |||
.brw = BRW_NEW_CONTEXT | | |||
BRW_NEW_META_IN_PROGRESS | | |||
BRW_NEW_RASTERIZER_DISCARD, | |||
.cache = CACHE_NEW_WM_PROG | |||
}, | |||
.emit = upload_clip_state, |