Hook up archrast counters for shader stats: instructions executed. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>tags/18.1-branchpoint
@@ -61,7 +61,7 @@ namespace ArchRast | |||
//@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. | |||
}; | |||
struct GSStats | |||
struct GSInfo | |||
{ | |||
uint32_t inputPrimCount; | |||
uint32_t primGeneratedCount; | |||
@@ -369,7 +369,7 @@ namespace ArchRast | |||
DepthStencilStats mDSOmZ = {}; | |||
CStats mClipper = {}; | |||
TEStats mTS = {}; | |||
GSStats mGS = {}; | |||
GSInfo mGS = {}; | |||
RastStats rastStats = {}; | |||
CullStats mCullStats = {}; | |||
AlphaStats mAlphaStats = {}; |
@@ -115,6 +115,36 @@ event FrontendStatsEvent | |||
uint64_t SoNumPrimsWritten3; | |||
}; | |||
event VSStats | |||
{ | |||
uint32_t numInstExecuted; | |||
}; | |||
event HSStats | |||
{ | |||
uint32_t numInstExecuted; | |||
}; | |||
event DSStats | |||
{ | |||
uint32_t numInstExecuted; | |||
}; | |||
event GSStats | |||
{ | |||
uint32_t numInstExecuted; | |||
}; | |||
event PSStats | |||
{ | |||
uint32_t numInstExecuted; | |||
}; | |||
event CSStats | |||
{ | |||
uint32_t numInstExecuted; | |||
}; | |||
event BackendStatsEvent | |||
{ | |||
uint32_t drawId; |
@@ -81,6 +81,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup | |||
state.pfnCsFunc(GetPrivateState(pDC), &csContext); | |||
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); | |||
AR_EVENT(CSStats(csContext.stats.numInstExecuted)); | |||
RDTSC_END(BEDispatch, 1); | |||
} |
@@ -968,6 +968,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); | |||
RDTSC_END(BEPixelShader, 0); | |||
// update stats | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); | |||
AR_EVENT(PSStats(psContext.stats.numInstExecuted)); | |||
// update active lanes to remove any discarded or oMask'd pixels | |||
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si()))); | |||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; }; |
@@ -163,10 +163,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
// execute pixel shader | |||
RDTSC_BEGIN(BEPixelShader, pDC->drawId); | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); | |||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); | |||
RDTSC_END(BEPixelShader, 0); | |||
// update stats | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); | |||
AR_EVENT(PSStats(psContext.stats.numInstExecuted)); | |||
vCoverageMask = _simd_castsi_ps(psContext.activeMask); | |||
// late-Z |
@@ -146,10 +146,13 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
// execute pixel shader | |||
RDTSC_BEGIN(BEPixelShader, pDC->drawId); | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); | |||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); | |||
RDTSC_END(BEPixelShader, 0); | |||
// update stats | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); | |||
AR_EVENT(PSStats(psContext.stats.numInstExecuted)); | |||
vCoverageMask = _simd_castsi_ps(psContext.activeMask); | |||
// late-Z |
@@ -851,6 +851,7 @@ static void GeometryShaderStage( | |||
// execute the geometry shader | |||
state.pfnGsFunc(GetPrivateState(pDC), &gsContext); | |||
AR_EVENT(GSStats(gsContext.stats.numInstExecuted)); | |||
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) | |||
{ | |||
@@ -1253,6 +1254,7 @@ static void TessellationStages( | |||
RDTSC_END(FEHullShader, 0); | |||
UPDATE_STAT_FE(HsInvocations, numPrims); | |||
AR_EVENT(HSStats(hsContext.stats.numInstExecuted)); | |||
const uint32_t* pPrimId = (const uint32_t*)&primID; | |||
@@ -1316,6 +1318,8 @@ static void TessellationStages( | |||
state.pfnDsFunc(GetPrivateState(pDC), &dsContext); | |||
RDTSC_END(FEDomainShader, 0); | |||
AR_EVENT(DSStats(dsContext.stats.numInstExecuted)); | |||
dsInvocations += KNOB_SIMD_WIDTH; | |||
} | |||
UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints); | |||
@@ -1793,12 +1797,15 @@ void ProcessDraw( | |||
RDTSC_BEGIN(FEVertexShader, pDC->drawId); | |||
#if USE_SIMD16_VS | |||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); | |||
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); | |||
#else | |||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); | |||
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); | |||
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH | |||
{ | |||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi); | |||
AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted)); | |||
} | |||
#endif | |||
RDTSC_END(FEVertexShader, 0); | |||
@@ -2016,6 +2023,7 @@ void ProcessDraw( | |||
RDTSC_END(FEVertexShader, 0); | |||
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); | |||
AR_EVENT(VSStats(vsContext.stats.numInstExecuted)); | |||
} | |||
} | |||