This has more of a negative impact than the previous patch, as on Gen6 passing primitives through to the clipper means we actually have to make the GS thread write them to the URB. I don't see another good solution though, and rasterizer discard is not the most common of cases, so hopefully it won't be too terrible. v2: Add a perf_debug; resolve rebase conflicts on the brw dirty flags; remove the rasterizer_discard field from brw_gs_prog_key. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net> [v1] Reviewed-by: Paul Berry <stereotype441@gmail.com>tags/mesa-9.2-rc1
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; | swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; | ||||
} | } | ||||
} | } | ||||
/* On Gen6, GS is also used for rasterizer discard. */ | |||||
/* BRW_NEW_RASTERIZER_DISCARD */ | |||||
if (ctx->RasterDiscard) { | |||||
key->need_gs_prog = true; | |||||
key->rasterizer_discard = true; | |||||
} | |||||
} else { | } else { | ||||
/* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP | /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP | ||||
* into simpler primitives. | * into simpler primitives. | ||||
.dirty = { | .dirty = { | ||||
.mesa = (_NEW_LIGHT), | .mesa = (_NEW_LIGHT), | ||||
.brw = (BRW_NEW_PRIMITIVE | | .brw = (BRW_NEW_PRIMITIVE | | ||||
BRW_NEW_TRANSFORM_FEEDBACK | | |||||
BRW_NEW_RASTERIZER_DISCARD), | |||||
BRW_NEW_TRANSFORM_FEEDBACK), | |||||
.cache = CACHE_NEW_VS_PROG | .cache = CACHE_NEW_VS_PROG | ||||
}, | }, | ||||
.emit = brw_upload_gs_prog | .emit = brw_upload_gs_prog |
GLuint pv_first:1; | GLuint pv_first:1; | ||||
GLuint need_gs_prog:1; | GLuint need_gs_prog:1; | ||||
GLuint rasterizer_discard:1; | |||||
/** | /** | ||||
* Number of varyings that are output to transform feedback. | * Number of varyings that are output to transform feedback. |
} | } | ||||
} | } | ||||
/** | |||||
* De-allocate the URB entry that was previously allocated to this thread | |||||
* (without writing any vertex data to it), and terminate the thread. This is | |||||
* used to implement RASTERIZER_DISCARD functionality. | |||||
*/ | |||||
static void brw_gs_terminate(struct brw_gs_compile *c) | |||||
{ | |||||
struct brw_compile *p = &c->func; | |||||
brw_urb_WRITE(p, | |||||
retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */ | |||||
0, /* msg_reg_nr */ | |||||
c->reg.header, /* src0 */ | |||||
false, /* allocate */ | |||||
false, /* used */ | |||||
1, /* msg_length */ | |||||
0, /* response_length */ | |||||
true, /* eot */ | |||||
true, /* writes_complete */ | |||||
0, /* offset */ | |||||
BRW_URB_SWIZZLE_NONE); | |||||
} | |||||
/** | /** | ||||
* Send an FF_SYNC message to ensure that all previously spawned GS threads | * Send an FF_SYNC message to ensure that all previously spawned GS threads | ||||
* have finished sending primitives down the pipeline, and to allocate a URB | * have finished sending primitives down the pipeline, and to allocate a URB | ||||
brw_gs_ff_sync(c, 1); | brw_gs_ff_sync(c, 1); | ||||
/* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so | |||||
* release the URB that was just allocated, and terminate the thread. | |||||
*/ | |||||
if (key->rasterizer_discard) { | |||||
brw_gs_terminate(c); | |||||
return; | |||||
} | |||||
brw_gs_overwrite_header_dw2_from_r0(c); | brw_gs_overwrite_header_dw2_from_r0(c); | ||||
switch (num_verts) { | switch (num_verts) { | ||||
case 1: | case 1: |
dw2 |= GEN6_CLIP_GB_TEST; | dw2 |= GEN6_CLIP_GB_TEST; | ||||
} | } | ||||
/* BRW_NEW_RASTERIZER_DISCARD */ | |||||
if (ctx->RasterDiscard) { | |||||
dw2 |= GEN6_CLIP_MODE_REJECT_ALL; | |||||
perf_debug("Rasterizer discard is currently implemented via the clipper; " | |||||
"having the GS not write primitives would likely be faster."); | |||||
} | |||||
BEGIN_BATCH(4); | BEGIN_BATCH(4); | ||||
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); | OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); | ||||
OUT_BATCH(dw1); | OUT_BATCH(dw1); | ||||
const struct brw_tracked_state gen6_clip_state = { | const struct brw_tracked_state gen6_clip_state = { | ||||
.dirty = { | .dirty = { | ||||
.mesa = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS, | .mesa = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS, | ||||
.brw = BRW_NEW_CONTEXT | BRW_NEW_META_IN_PROGRESS, | |||||
.brw = BRW_NEW_CONTEXT | | |||||
BRW_NEW_META_IN_PROGRESS | | |||||
BRW_NEW_RASTERIZER_DISCARD, | |||||
.cache = CACHE_NEW_WM_PROG | .cache = CACHE_NEW_WM_PROG | ||||
}, | }, | ||||
.emit = upload_clip_state, | .emit = upload_clip_state, |