- we don't usually need to flush TC L2 - we should flush KCACHE (not really an issue now since we always flush KCACHE when updating descriptors, but it could be a problem if we used CE, which doesn't require flushing KCACHE) - add an explicit VS_PARTIAL_FLUSH flag Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>tags/10.5-branchpoint
@@ -856,6 +856,36 @@ static void si_set_streamout_targets(struct pipe_context *ctx, | |||
unsigned old_num_targets = sctx->b.streamout.num_targets; | |||
unsigned i, bufidx; | |||
/* We are going to unbind the buffers. Mark which caches need to be flushed. */ | |||
if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) { | |||
/* Since streamout uses vector writes which go through TC L2 | |||
* and most other clients can use TC L2 as well, we don't need | |||
* to flush it. | |||
* | |||
* The only case which requires flushing it is VGT DMA index | |||
* fetching, which is a rare case. Thus, flag the TC L2 | |||
* dirtiness in the resource and handle it when index fetching | |||
* is used. | |||
*/ | |||
for (i = 0; i < sctx->b.streamout.num_targets; i++) | |||
if (sctx->b.streamout.targets[i]) | |||
r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true; | |||
/* Invalidate the scalar cache in case a streamout buffer is | |||
* going to be used as a constant buffer. | |||
* | |||
* Invalidate TC L1, because streamout bypasses it (done by | |||
* setting GLC=1 in the store instruction), but it can contain | |||
* outdated data of streamout buffers. | |||
* | |||
* VS_PARTIAL_FLUSH is required if the buffers are going to be | |||
* used as an input immediately. | |||
*/ | |||
sctx->b.flags |= SI_CONTEXT_INV_KCACHE | | |||
SI_CONTEXT_INV_TC_L1 | | |||
SI_CONTEXT_VS_PARTIAL_FLUSH; | |||
} | |||
/* Streamout buffers must be bound in 2 places: | |||
* 1) in VGT by setting the VGT_STRMOUT registers | |||
* 2) as shader resources |
@@ -65,13 +65,14 @@ | |||
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6) | |||
#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7) | |||
/* Engine synchronization. */ | |||
#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8) | |||
#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) | |||
#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10) | |||
#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 11) | |||
#define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8) | |||
#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) | |||
#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10) | |||
#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11) | |||
#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 12) | |||
/* Compute only. */ | |||
#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 12) /* TODO: merge with TC? */ | |||
#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13) | |||
#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */ | |||
#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 14) | |||
#define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \ | |||
SI_CONTEXT_FLUSH_AND_INV_CB_META | \ |
@@ -388,9 +388,9 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato | |||
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); | |||
} | |||
if (sctx->flags & (SI_CONTEXT_INV_TC_L1 | R600_CONTEXT_STREAMOUT_FLUSH)) | |||
if (sctx->flags & SI_CONTEXT_INV_TC_L1) | |||
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); | |||
if (sctx->flags & (SI_CONTEXT_INV_TC_L2 | R600_CONTEXT_STREAMOUT_FLUSH)) | |||
if (sctx->flags & SI_CONTEXT_INV_TC_L2) | |||
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); | |||
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) { | |||
@@ -444,8 +444,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato | |||
if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); | |||
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); | |||
} else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { | |||
/* Needed if streamout buffers are going to be used as a source. */ | |||
} else if (sctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); | |||
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); | |||
} |