Renamed rdstc defines more appropriately Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>tags/18.1-branchpoint
@@ -256,9 +256,9 @@ void QueueWork(SWR_CONTEXT *pContext) | |||
} | |||
else | |||
{ | |||
AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId); | |||
RDTSC_BEGIN(APIDrawWakeAllThreads, pDC->drawId); | |||
WakeAllThreads(pContext); | |||
AR_API_END(APIDrawWakeAllThreads, 1); | |||
RDTSC_END(APIDrawWakeAllThreads, 1); | |||
} | |||
// Set current draw context to NULL so that next state call forces a new draw context to be created and populated. | |||
@@ -278,7 +278,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext) | |||
DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) | |||
{ | |||
AR_API_BEGIN(APIGetDrawContext, 0); | |||
RDTSC_BEGIN(APIGetDrawContext, 0); | |||
// If current draw context is null then need to obtain a new draw context to use from ring. | |||
if (pContext->pCurDrawContext == nullptr) | |||
{ | |||
@@ -367,7 +367,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) | |||
SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC"); | |||
} | |||
AR_API_END(APIGetDrawContext, 0); | |||
RDTSC_END(APIGetDrawContext, 0); | |||
return pContext->pCurDrawContext; | |||
} | |||
@@ -477,7 +477,7 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
DRAW_CONTEXT* pDC = GetDrawContext(pContext); | |||
AR_API_BEGIN(APISync, 0); | |||
RDTSC_BEGIN(APISync, 0); | |||
pDC->FeWork.type = SYNC; | |||
pDC->FeWork.pfnWork = ProcessSync; | |||
@@ -493,7 +493,7 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint | |||
//enqueue | |||
QueueDraw(pContext); | |||
AR_API_END(APISync, 1); | |||
RDTSC_END(APISync, 1); | |||
} | |||
void SwrStallBE(HANDLE hContext) | |||
@@ -508,28 +508,28 @@ void SwrWaitForIdle(HANDLE hContext) | |||
{ | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
AR_API_BEGIN(APIWaitForIdle, 0); | |||
RDTSC_BEGIN(APIWaitForIdle, 0); | |||
while (!pContext->dcRing.IsEmpty()) | |||
{ | |||
_mm_pause(); | |||
} | |||
AR_API_END(APIWaitForIdle, 1); | |||
RDTSC_END(APIWaitForIdle, 1); | |||
} | |||
void SwrWaitForIdleFE(HANDLE hContext) | |||
{ | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
AR_API_BEGIN(APIWaitForIdle, 0); | |||
RDTSC_BEGIN(APIWaitForIdle, 0); | |||
while (pContext->drawsOutstandingFE > 0) | |||
{ | |||
_mm_pause(); | |||
} | |||
AR_API_END(APIWaitForIdle, 1); | |||
RDTSC_END(APIWaitForIdle, 1); | |||
} | |||
void SwrSetVertexBuffers( | |||
@@ -1167,7 +1167,7 @@ void DrawInstanced( | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
DRAW_CONTEXT* pDC = GetDrawContext(pContext); | |||
AR_API_BEGIN(APIDraw, pDC->drawId); | |||
RDTSC_BEGIN(APIDraw, pDC->drawId); | |||
AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance)); | |||
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology); | |||
@@ -1230,7 +1230,7 @@ void DrawInstanced( | |||
pDC = GetDrawContext(pContext); | |||
pDC->pState->state.rastState.cullMode = oldCullMode; | |||
AR_API_END(APIDraw, numVertices * numInstances); | |||
RDTSC_END(APIDraw, numVertices * numInstances); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -1295,7 +1295,7 @@ void DrawIndexedInstance( | |||
DRAW_CONTEXT* pDC = GetDrawContext(pContext); | |||
API_STATE* pState = &pDC->pState->state; | |||
AR_API_BEGIN(APIDrawIndexed, pDC->drawId); | |||
RDTSC_BEGIN(APIDrawIndexed, pDC->drawId); | |||
AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); | |||
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); | |||
@@ -1376,7 +1376,7 @@ void DrawIndexedInstance( | |||
pDC = GetDrawContext(pContext); | |||
pDC->pState->state.rastState.cullMode = oldCullMode; | |||
AR_API_END(APIDrawIndexed, numIndices * numInstances); | |||
RDTSC_END(APIDrawIndexed, numIndices * numInstances); | |||
} | |||
@@ -1508,7 +1508,7 @@ void SwrDispatch( | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
DRAW_CONTEXT* pDC = GetDrawContext(pContext); | |||
AR_API_BEGIN(APIDispatch, pDC->drawId); | |||
RDTSC_BEGIN(APIDispatch, pDC->drawId); | |||
AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ)); | |||
pDC->isCompute = true; // This is a compute context. | |||
@@ -1524,7 +1524,7 @@ void SwrDispatch( | |||
pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE); | |||
QueueDispatch(pContext); | |||
AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); | |||
RDTSC_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); | |||
} | |||
// Deswizzles, converts and stores current contents of the hot tiles to surface | |||
@@ -1543,7 +1543,7 @@ void SWR_API SwrStoreTiles( | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
DRAW_CONTEXT* pDC = GetDrawContext(pContext); | |||
AR_API_BEGIN(APIStoreTiles, pDC->drawId); | |||
RDTSC_BEGIN(APIStoreTiles, pDC->drawId); | |||
pDC->FeWork.type = STORETILES; | |||
pDC->FeWork.pfnWork = ProcessStoreTiles; | |||
@@ -1557,7 +1557,7 @@ void SWR_API SwrStoreTiles( | |||
AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId)); | |||
AR_API_END(APIStoreTiles, 1); | |||
RDTSC_END(APIStoreTiles, 1); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -1586,7 +1586,7 @@ void SWR_API SwrClearRenderTarget( | |||
SWR_CONTEXT *pContext = GetContext(hContext); | |||
DRAW_CONTEXT* pDC = GetDrawContext(pContext); | |||
AR_API_BEGIN(APIClearRenderTarget, pDC->drawId); | |||
RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId); | |||
pDC->FeWork.type = CLEAR; | |||
pDC->FeWork.pfnWork = ProcessClear; | |||
@@ -1604,7 +1604,7 @@ void SWR_API SwrClearRenderTarget( | |||
// enqueue draw | |||
QueueDraw(pContext); | |||
AR_API_END(APIClearRenderTarget, 1); | |||
RDTSC_END(APIClearRenderTarget, 1); | |||
} | |||
////////////////////////////////////////////////////////////////////////// |
@@ -48,7 +48,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(BEDispatch, pDC->drawId); | |||
RDTSC_BEGIN(BEDispatch, pDC->drawId); | |||
const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData(); | |||
SWR_ASSERT(pTaskData != nullptr); | |||
@@ -82,7 +82,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup | |||
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); | |||
AR_END(BEDispatch, 1); | |||
RDTSC_END(BEDispatch, 1); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -107,7 +107,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(BEStoreTiles, pDC->drawId); | |||
RDTSC_BEGIN(BEStoreTiles, pDC->drawId); | |||
SWR_FORMAT srcFormat; | |||
switch (attachment) | |||
@@ -159,7 +159,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile | |||
} | |||
} | |||
} | |||
AR_END(BEStoreTiles, 1); | |||
RDTSC_END(BEStoreTiles, 1); | |||
} | |||
void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) | |||
@@ -201,9 +201,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(BENullBackend, pDC->drawId); | |||
RDTSC_BEGIN(BENullBackend, pDC->drawId); | |||
///@todo: handle center multisample pattern | |||
AR_BEGIN(BESetup, pDC->drawId); | |||
RDTSC_BEGIN(BESetup, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
@@ -216,7 +216,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, | |||
SWR_PS_CONTEXT psContext; | |||
// skip SetupPixelShaderContext(&psContext, ...); // not needed here | |||
AR_END(BESetup, 0); | |||
RDTSC_END(BESetup, 0); | |||
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y))); | |||
@@ -257,7 +257,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, | |||
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz); | |||
} | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
// calculate per sample positions | |||
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample)); | |||
@@ -269,7 +269,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, | |||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); | |||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); | |||
AR_END(BEBarycentric, 0); | |||
RDTSC_END(BEBarycentric, 0); | |||
// interpolate user clip distance if available | |||
if (state.backendState.clipDistanceMask) | |||
@@ -280,13 +280,13 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, | |||
simdscalar vCoverageMask = _simd_vmask_ps(coverageMask); | |||
simdscalar stencilPassMask = vCoverageMask; | |||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId); | |||
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); | |||
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, | |||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); | |||
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); | |||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, | |||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); | |||
AR_END(BEEarlyDepthTest, 0); | |||
RDTSC_END(BEEarlyDepthTest, 0); | |||
uint32_t statMask = _simd_movemask_ps(depthPassMask); | |||
uint32_t statCount = _mm_popcnt_u32(statMask); | |||
@@ -307,7 +307,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, | |||
vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy); | |||
} | |||
AR_END(BENullBackend, 0); | |||
RDTSC_END(BENullBackend, 0); | |||
} | |||
PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {}; |
@@ -181,7 +181,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo | |||
SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason. | |||
AR_BEGIN(BEClear, pDC->drawId); | |||
RDTSC_BEGIN(BEClear, pDC->drawId); | |||
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) | |||
{ | |||
@@ -217,13 +217,13 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo | |||
pHotTile->state = HOTTILE_CLEAR; | |||
} | |||
AR_END(BEClear, 1); | |||
RDTSC_END(BEClear, 1); | |||
} | |||
else | |||
{ | |||
// Legacy clear | |||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; | |||
AR_BEGIN(BEClear, pDC->drawId); | |||
RDTSC_BEGIN(BEClear, pDC->drawId); | |||
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) | |||
{ | |||
@@ -265,7 +265,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo | |||
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); | |||
} | |||
AR_END(BEClear, 1); | |||
RDTSC_END(BEClear, 1); | |||
} | |||
} | |||
@@ -600,7 +600,7 @@ struct PixelRateZTestLoop | |||
vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz))); | |||
} | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
// calculate per sample positions | |||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample)); | |||
@@ -622,7 +622,7 @@ struct PixelRateZTestLoop | |||
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]); | |||
} | |||
AR_END(BEBarycentric, 0); | |||
RDTSC_END(BEBarycentric, 0); | |||
///@todo: perspective correct vs non-perspective correct clipping? | |||
// if clip distances are enabled, we need to interpolate for each sample | |||
@@ -635,13 +635,13 @@ struct PixelRateZTestLoop | |||
// ZTest for this sample | |||
///@todo Need to uncomment out this bucket. | |||
//AR_BEGIN(BEDepthBucket, pDC->drawId); | |||
//RDTSC_BEGIN(BEDepthBucket, pDC->drawId); | |||
depthPassMask[sample] = vCoverageMask[sample]; | |||
stencilPassMask[sample] = vCoverageMask[sample]; | |||
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, | |||
vZ[sample], pDepthSample, vCoverageMask[sample], | |||
pStencilSample, &stencilPassMask[sample]); | |||
//AR_END(BEDepthBucket, 0); | |||
//RDTSC_END(BEDepthBucket, 0); | |||
// early-exit if no pixels passed depth or earlyZ is forced on | |||
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample])) | |||
@@ -869,8 +869,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(BEPixelRateBackend, pDC->drawId); | |||
AR_BEGIN(BESetup, pDC->drawId); | |||
RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId); | |||
RDTSC_BEGIN(BESetup, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
@@ -884,7 +884,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
uint8_t *pDepthBuffer, *pStencilBuffer; | |||
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers); | |||
AR_END(BESetup, 0); | |||
RDTSC_END(BESetup, 0); | |||
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.backendState.clipDistanceMask); | |||
@@ -916,13 +916,13 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask); | |||
} | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
CalcPixelBarycentrics(coeffs, psContext); | |||
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask); | |||
AR_END(BEBarycentric, 0); | |||
RDTSC_END(BEBarycentric, 0); | |||
if(T::bForcedSampleCount) | |||
{ | |||
@@ -944,11 +944,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
if(state.psState.usesSourceDepth) | |||
{ | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
// interpolate and quantize z | |||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); | |||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); | |||
AR_END(BEBarycentric, 0); | |||
RDTSC_END(BEBarycentric, 0); | |||
} | |||
// pixels that are currently active | |||
@@ -956,10 +956,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
psContext.oMask = T::MultisampleT::FullSampleMask(); | |||
// execute pixel shader | |||
AR_BEGIN(BEPixelShader, pDC->drawId); | |||
RDTSC_BEGIN(BEPixelShader, pDC->drawId); | |||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); | |||
AR_END(BEPixelShader, 0); | |||
RDTSC_END(BEPixelShader, 0); | |||
// update active lanes to remove any discarded or oMask'd pixels | |||
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si()))); | |||
@@ -980,7 +980,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
// loop over all samples, broadcasting the results of the PS to all passing pixels | |||
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++) | |||
{ | |||
AR_BEGIN(BEOutputMerger, pDC->drawId); | |||
RDTSC_BEGIN(BEOutputMerger, pDC->drawId); | |||
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples | |||
uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample; | |||
simdscalar coverageMask, depthMask; | |||
@@ -995,7 +995,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
if(!_simd_movemask_ps(depthMask)) | |||
{ | |||
// stencil should already have been written in early/lateZ tests | |||
AR_END(BEOutputMerger, 0); | |||
RDTSC_END(BEOutputMerger, 0); | |||
continue; | |||
} | |||
} | |||
@@ -1015,10 +1015,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t | |||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum], | |||
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]); | |||
} | |||
AR_END(BEOutputMerger, 0); | |||
RDTSC_END(BEOutputMerger, 0); | |||
} | |||
Endtile: | |||
AR_BEGIN(BEEndTile, pDC->drawId); | |||
RDTSC_BEGIN(BEEndTile, pDC->drawId); | |||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) | |||
{ | |||
@@ -1054,7 +1054,7 @@ Endtile: | |||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8; | |||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; | |||
AR_END(BEEndTile, 0); | |||
RDTSC_END(BEEndTile, 0); | |||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); | |||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); | |||
@@ -1064,7 +1064,7 @@ Endtile: | |||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); | |||
} | |||
AR_END(BEPixelRateBackend, 0); | |||
RDTSC_END(BEPixelRateBackend, 0); | |||
} | |||
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0, |
@@ -42,8 +42,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(BESampleRateBackend, pDC->drawId); | |||
AR_BEGIN(BESetup, pDC->drawId); | |||
RDTSC_BEGIN(BESampleRateBackend, pDC->drawId); | |||
RDTSC_BEGIN(BESetup, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
@@ -57,7 +57,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
uint8_t *pDepthBuffer, *pStencilBuffer; | |||
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers); | |||
AR_END(BESetup, 0); | |||
RDTSC_END(BESetup, 0); | |||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y))); | |||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y))); | |||
@@ -83,13 +83,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask); | |||
} | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
CalcPixelBarycentrics(coeffs, psContext); | |||
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask); | |||
AR_END(BEBarycentric, 0); | |||
RDTSC_END(BEBarycentric, 0); | |||
for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++) | |||
{ | |||
@@ -113,7 +113,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz); | |||
} | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
// calculate per sample positions | |||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample)); | |||
@@ -125,7 +125,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); | |||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); | |||
AR_END(BEBarycentric, 0); | |||
RDTSC_END(BEBarycentric, 0); | |||
// interpolate user clip distance if available | |||
if (state.backendState.clipDistanceMask) | |||
@@ -140,11 +140,11 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
// Early-Z? | |||
if (T::bCanEarlyZ) | |||
{ | |||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId); | |||
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); | |||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, | |||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); | |||
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); | |||
AR_END(BEEarlyDepthTest, 0); | |||
RDTSC_END(BEEarlyDepthTest, 0); | |||
// early-exit if no samples passed depth or earlyZ is forced on. | |||
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask)) | |||
@@ -164,21 +164,21 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
psContext.activeMask = _simd_castps_si(vCoverageMask); | |||
// execute pixel shader | |||
AR_BEGIN(BEPixelShader, pDC->drawId); | |||
RDTSC_BEGIN(BEPixelShader, pDC->drawId); | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); | |||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); | |||
AR_END(BEPixelShader, 0); | |||
RDTSC_END(BEPixelShader, 0); | |||
vCoverageMask = _simd_castsi_ps(psContext.activeMask); | |||
// late-Z | |||
if (!T::bCanEarlyZ) | |||
{ | |||
AR_BEGIN(BELateDepthTest, pDC->drawId); | |||
RDTSC_BEGIN(BELateDepthTest, pDC->drawId); | |||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, | |||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); | |||
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); | |||
AR_END(BELateDepthTest, 0); | |||
RDTSC_END(BELateDepthTest, 0); | |||
if (!_simd_movemask_ps(depthPassMask)) | |||
{ | |||
@@ -196,7 +196,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
UPDATE_STAT_BE(DepthPassCount, statCount); | |||
// output merger | |||
AR_BEGIN(BEOutputMerger, pDC->drawId); | |||
RDTSC_BEGIN(BEOutputMerger, pDC->drawId); | |||
#if USE_8x2_TILE_BACKEND | |||
OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); | |||
#else | |||
@@ -209,7 +209,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, | |||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); | |||
} | |||
AR_END(BEOutputMerger, 0); | |||
RDTSC_END(BEOutputMerger, 0); | |||
} | |||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); | |||
} | |||
@@ -217,7 +217,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
Endtile: | |||
ATTR_UNUSED; | |||
AR_BEGIN(BEEndTile, pDC->drawId); | |||
RDTSC_BEGIN(BEEndTile, pDC->drawId); | |||
if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) | |||
{ | |||
@@ -247,7 +247,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8; | |||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; | |||
AR_END(BEEndTile, 0); | |||
RDTSC_END(BEEndTile, 0); | |||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); | |||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); | |||
@@ -257,7 +257,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ | |||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); | |||
} | |||
AR_END(BESampleRateBackend, 0); | |||
RDTSC_END(BESampleRateBackend, 0); | |||
} | |||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function |
@@ -42,8 +42,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(BESingleSampleBackend, pDC->drawId); | |||
AR_BEGIN(BESetup, pDC->drawId); | |||
RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId); | |||
RDTSC_BEGIN(BESetup, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
@@ -57,7 +57,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
uint8_t *pDepthBuffer, *pStencilBuffer; | |||
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers); | |||
AR_END(BESetup, 1); | |||
RDTSC_END(BESetup, 1); | |||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y))); | |||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y))); | |||
@@ -99,7 +99,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask); | |||
} | |||
AR_BEGIN(BEBarycentric, pDC->drawId); | |||
RDTSC_BEGIN(BEBarycentric, pDC->drawId); | |||
CalcPixelBarycentrics(coeffs, psContext); | |||
@@ -109,7 +109,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); | |||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); | |||
AR_END(BEBarycentric, 1); | |||
RDTSC_END(BEBarycentric, 1); | |||
// interpolate user clip distance if available | |||
if (state.backendState.clipDistanceMask) | |||
@@ -124,11 +124,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
// Early-Z? | |||
if (T::bCanEarlyZ) | |||
{ | |||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId); | |||
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); | |||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, | |||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); | |||
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); | |||
AR_END(BEEarlyDepthTest, 0); | |||
RDTSC_END(BEEarlyDepthTest, 0); | |||
// early-exit if no pixels passed depth or earlyZ is forced on | |||
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask)) | |||
@@ -147,21 +147,21 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
psContext.activeMask = _simd_castps_si(vCoverageMask); | |||
// execute pixel shader | |||
AR_BEGIN(BEPixelShader, pDC->drawId); | |||
RDTSC_BEGIN(BEPixelShader, pDC->drawId); | |||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); | |||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); | |||
AR_END(BEPixelShader, 0); | |||
RDTSC_END(BEPixelShader, 0); | |||
vCoverageMask = _simd_castsi_ps(psContext.activeMask); | |||
// late-Z | |||
if (!T::bCanEarlyZ) | |||
{ | |||
AR_BEGIN(BELateDepthTest, pDC->drawId); | |||
RDTSC_BEGIN(BELateDepthTest, pDC->drawId); | |||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, | |||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); | |||
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); | |||
AR_END(BELateDepthTest, 0); | |||
RDTSC_END(BELateDepthTest, 0); | |||
if (!_simd_movemask_ps(depthPassMask)) | |||
{ | |||
@@ -181,7 +181,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
UPDATE_STAT_BE(DepthPassCount, statCount); | |||
// output merger | |||
AR_BEGIN(BEOutputMerger, pDC->drawId); | |||
RDTSC_BEGIN(BEOutputMerger, pDC->drawId); | |||
#if USE_8x2_TILE_BACKEND | |||
OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); | |||
#else | |||
@@ -194,11 +194,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 | |||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, | |||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask); | |||
} | |||
AR_END(BEOutputMerger, 0); | |||
RDTSC_END(BEOutputMerger, 0); | |||
} | |||
Endtile: | |||
AR_BEGIN(BEEndTile, pDC->drawId); | |||
RDTSC_BEGIN(BEEndTile, pDC->drawId); | |||
work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); | |||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) | |||
@@ -229,7 +229,7 @@ Endtile: | |||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8; | |||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; | |||
AR_END(BEEndTile, 0); | |||
RDTSC_END(BEEndTile, 0); | |||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); | |||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); | |||
@@ -239,7 +239,7 @@ Endtile: | |||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); | |||
} | |||
AR_END(BESingleSampleBackend, 0); | |||
RDTSC_END(BESingleSampleBackend, 0); | |||
} | |||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function |
@@ -651,7 +651,7 @@ void SIMDCALL BinTrianglesImpl( | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx); | |||
AR_BEGIN(FEBinTriangles, pDC->drawId); | |||
RDTSC_BEGIN(FEBinTriangles, pDC->drawId); | |||
const API_STATE& state = GetApiState(pDC); | |||
const SWR_RASTSTATE& rastState = state.rastState; | |||
@@ -958,7 +958,7 @@ void SIMDCALL BinTrianglesImpl( | |||
if (!triMask) | |||
{ | |||
AR_END(FEBinTriangles, 1); | |||
RDTSC_END(FEBinTriangles, 1); | |||
return; | |||
} | |||
} | |||
@@ -998,7 +998,7 @@ endBinTriangles: | |||
BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); | |||
AR_END(FEBinTriangles, 1); | |||
RDTSC_END(FEBinTriangles, 1); | |||
return; | |||
} | |||
else if (rastState.fillMode == SWR_FILLMODE_POINT) | |||
@@ -1008,7 +1008,7 @@ endBinTriangles: | |||
BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx); | |||
BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx); | |||
AR_END(FEBinTriangles, 1); | |||
RDTSC_END(FEBinTriangles, 1); | |||
return; | |||
} | |||
@@ -1114,7 +1114,7 @@ endBinTriangles: | |||
triMask &= ~(1 << triIndex); | |||
} | |||
AR_END(FEBinTriangles, 1); | |||
RDTSC_END(FEBinTriangles, 1); | |||
} | |||
template <typename CT> | |||
@@ -1197,7 +1197,7 @@ void BinPostSetupPointsImpl( | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEBinPoints, pDC->drawId); | |||
RDTSC_BEGIN(FEBinPoints, pDC->drawId); | |||
typename SIMD_T::Vec4 &primVerts = prim[0]; | |||
@@ -1480,7 +1480,7 @@ void BinPostSetupPointsImpl( | |||
} | |||
} | |||
AR_END(FEBinPoints, 1); | |||
RDTSC_END(FEBinPoints, 1); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -1608,7 +1608,7 @@ void BinPostSetupLinesImpl( | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx); | |||
AR_BEGIN(FEBinLines, pDC->drawId); | |||
RDTSC_BEGIN(FEBinLines, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
const SWR_RASTSTATE &rastState = state.rastState; | |||
@@ -1789,7 +1789,7 @@ void BinPostSetupLinesImpl( | |||
endBinLines: | |||
AR_END(FEBinLines, 1); | |||
RDTSC_END(FEBinLines, 1); | |||
} | |||
////////////////////////////////////////////////////////////////////////// |
@@ -164,30 +164,30 @@ void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvecto | |||
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEClipTriangles, pDC->drawId); | |||
RDTSC_BEGIN(FEClipTriangles, pDC->drawId); | |||
Clipper<SIMD256, 3> clipper(workerId, pDC); | |||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); | |||
AR_END(FEClipTriangles, 1); | |||
RDTSC_END(FEClipTriangles, 1); | |||
} | |||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, | |||
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEClipLines, pDC->drawId); | |||
RDTSC_BEGIN(FEClipLines, pDC->drawId); | |||
Clipper<SIMD256, 2> clipper(workerId, pDC); | |||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); | |||
AR_END(FEClipLines, 1); | |||
RDTSC_END(FEClipLines, 1); | |||
} | |||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, | |||
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEClipPoints, pDC->drawId); | |||
RDTSC_BEGIN(FEClipPoints, pDC->drawId); | |||
Clipper<SIMD256, 1> clipper(workerId, pDC); | |||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); | |||
AR_END(FEClipPoints, 1); | |||
RDTSC_END(FEClipPoints, 1); | |||
} | |||
#if USE_SIMD16_FRONTEND | |||
@@ -195,7 +195,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor | |||
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEClipTriangles, pDC->drawId); | |||
RDTSC_BEGIN(FEClipTriangles, pDC->drawId); | |||
enum { VERTS_PER_PRIM = 3 }; | |||
@@ -204,14 +204,14 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor | |||
pa.useAlternateOffset = false; | |||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); | |||
AR_END(FEClipTriangles, 1); | |||
RDTSC_END(FEClipTriangles, 1); | |||
} | |||
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, | |||
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEClipLines, pDC->drawId); | |||
RDTSC_BEGIN(FEClipLines, pDC->drawId); | |||
enum { VERTS_PER_PRIM = 2 }; | |||
@@ -220,14 +220,14 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI | |||
pa.useAlternateOffset = false; | |||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); | |||
AR_END(FEClipLines, 1); | |||
RDTSC_END(FEClipLines, 1); | |||
} | |||
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, | |||
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEClipPoints, pDC->drawId); | |||
RDTSC_BEGIN(FEClipPoints, pDC->drawId); | |||
enum { VERTS_PER_PRIM = 1 }; | |||
@@ -236,7 +236,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker | |||
pa.useAlternateOffset = false; | |||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); | |||
AR_END(FEClipPoints, 1); | |||
RDTSC_END(FEClipPoints, 1); | |||
} | |||
#endif |
@@ -719,11 +719,11 @@ public: | |||
if (clipMask) | |||
{ | |||
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); | |||
RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId); | |||
// we have to clip tris, execute the clipper, which will also | |||
// call the binner | |||
ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx); | |||
AR_END(FEGuardbandClip, 1); | |||
RDTSC_END(FEGuardbandClip, 1); | |||
} | |||
else if (validMask) | |||
{ |
@@ -526,30 +526,25 @@ struct SWR_CONTEXT | |||
#define AR_WORKER_CTX pContext->pArContext[workerId] | |||
#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads] | |||
#ifdef KNOB_ENABLE_RDTSC | |||
#define RDTSC_BEGIN(type, drawid) RDTSC_START(type) | |||
#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0) | |||
#else | |||
#define RDTSC_BEGIN(type, count) | |||
#define RDTSC_END(type, count) | |||
#endif | |||
#ifdef KNOB_ENABLE_AR | |||
#define _AR_BEGIN(ctx, type, id) ArchRast::Dispatch(ctx, ArchRast::Start(ArchRast::type, id)) | |||
#define _AR_END(ctx, type, count) ArchRast::Dispatch(ctx, ArchRast::End(ArchRast::type, count)) | |||
#define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event) | |||
#define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id) | |||
#else | |||
#ifdef KNOB_ENABLE_RDTSC | |||
#define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type) | |||
#define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0) | |||
#else | |||
#define _AR_BEGIN(ctx, type, id) (void)ctx | |||
#define _AR_END(ctx, type, id) | |||
#endif | |||
#define _AR_EVENT(ctx, event) | |||
#define _AR_FLUSH(ctx, id) | |||
#endif | |||
// Use these macros for api thread. | |||
#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id) | |||
#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count) | |||
#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event) | |||
// Use these macros for worker threads. | |||
#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id) | |||
#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count) | |||
#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event) | |||
#define AR_FLUSH(id) _AR_FLUSH(AR_WORKER_CTX, id) |
@@ -150,7 +150,7 @@ void ProcessStoreTiles( | |||
uint32_t workerId, | |||
void *pUserData) | |||
{ | |||
AR_BEGIN(FEProcessStoreTiles, pDC->drawId); | |||
RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId); | |||
MacroTileMgr *pTileMgr = pDC->pTileMgr; | |||
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData; | |||
@@ -175,7 +175,7 @@ void ProcessStoreTiles( | |||
} | |||
} | |||
AR_END(FEProcessStoreTiles, 0); | |||
RDTSC_END(FEProcessStoreTiles, 0); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -191,7 +191,7 @@ void ProcessDiscardInvalidateTiles( | |||
uint32_t workerId, | |||
void *pUserData) | |||
{ | |||
AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId); | |||
RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId); | |||
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; | |||
MacroTileMgr *pTileMgr = pDC->pTileMgr; | |||
@@ -230,7 +230,7 @@ void ProcessDiscardInvalidateTiles( | |||
} | |||
} | |||
AR_END(FEProcessInvalidateTiles, 0); | |||
RDTSC_END(FEProcessInvalidateTiles, 0); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -507,7 +507,7 @@ static void StreamOut( | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEStreamout, pDC->drawId); | |||
RDTSC_BEGIN(FEStreamout, pDC->drawId); | |||
const API_STATE& state = GetApiState(pDC); | |||
const SWR_STREAMOUT_STATE &soState = state.soState; | |||
@@ -582,7 +582,7 @@ static void StreamOut( | |||
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); | |||
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); | |||
AR_END(FEStreamout, 1); | |||
RDTSC_END(FEStreamout, 1); | |||
} | |||
#if USE_SIMD16_FRONTEND | |||
@@ -801,7 +801,7 @@ static void GeometryShaderStage( | |||
{ | |||
SWR_CONTEXT *pContext = pDC->pContext; | |||
AR_BEGIN(FEGeometryShader, pDC->drawId); | |||
RDTSC_BEGIN(FEGeometryShader, pDC->drawId); | |||
const API_STATE& state = GetApiState(pDC); | |||
const SWR_GS_STATE* pState = &state.gsState; | |||
@@ -1073,7 +1073,7 @@ static void GeometryShaderStage( | |||
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); | |||
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); | |||
AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims)); | |||
AR_END(FEGeometryShader, 1); | |||
RDTSC_END(FEGeometryShader, 1); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -1253,9 +1253,9 @@ static void TessellationStages( | |||
hsContext.mask = GenerateMask(numPrims); | |||
// Run the HS | |||
AR_BEGIN(FEHullShader, pDC->drawId); | |||
RDTSC_BEGIN(FEHullShader, pDC->drawId); | |||
state.pfnHsFunc(GetPrivateState(pDC), &hsContext); | |||
AR_END(FEHullShader, 0); | |||
RDTSC_END(FEHullShader, 0); | |||
UPDATE_STAT_FE(HsInvocations, numPrims); | |||
@@ -1265,10 +1265,10 @@ static void TessellationStages( | |||
{ | |||
// Run Tessellator | |||
SWR_TS_TESSELLATED_DATA tsData = { 0 }; | |||
AR_BEGIN(FETessellation, pDC->drawId); | |||
RDTSC_BEGIN(FETessellation, pDC->drawId); | |||
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData); | |||
AR_EVENT(TessPrimCount(1)); | |||
AR_END(FETessellation, 0); | |||
RDTSC_END(FETessellation, 0); | |||
if (tsData.NumPrimitives == 0) | |||
{ | |||
@@ -1317,9 +1317,9 @@ static void TessellationStages( | |||
{ | |||
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations); | |||
AR_BEGIN(FEDomainShader, pDC->drawId); | |||
RDTSC_BEGIN(FEDomainShader, pDC->drawId); | |||
state.pfnDsFunc(GetPrivateState(pDC), &dsContext); | |||
AR_END(FEDomainShader, 0); | |||
RDTSC_END(FEDomainShader, 0); | |||
dsInvocations += KNOB_SIMD_WIDTH; | |||
} | |||
@@ -1390,14 +1390,14 @@ static void TessellationStages( | |||
#else | |||
simdvector prim[3]; // Only deal with triangles, lines, or points | |||
#endif | |||
AR_BEGIN(FEPAAssemble, pDC->drawId); | |||
RDTSC_BEGIN(FEPAAssemble, pDC->drawId); | |||
bool assemble = | |||
#if USE_SIMD16_FRONTEND | |||
tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16); | |||
#else | |||
tessPa.Assemble(VERTEX_POSITION_SLOT, prim); | |||
#endif | |||
AR_END(FEPAAssemble, 1); | |||
RDTSC_END(FEPAAssemble, 1); | |||
SWR_ASSERT(assemble); | |||
SWR_ASSERT(pfnClipFunc); | |||
@@ -1520,7 +1520,7 @@ void ProcessDraw( | |||
} | |||
#endif | |||
AR_BEGIN(FEProcessDraw, pDC->drawId); | |||
RDTSC_BEGIN(FEProcessDraw, pDC->drawId); | |||
DRAW_WORK& work = *(DRAW_WORK*)pUserData; | |||
const API_STATE& state = GetApiState(pDC); | |||
@@ -1725,7 +1725,7 @@ void ProcessDraw( | |||
if (i < endVertex) | |||
{ | |||
// 1. Execute FS/VS for a single SIMD. | |||
AR_BEGIN(FEFetchShader, pDC->drawId); | |||
RDTSC_BEGIN(FEFetchShader, pDC->drawId); | |||
#if USE_SIMD16_SHADERS | |||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin); | |||
#else | |||
@@ -1736,7 +1736,7 @@ void ProcessDraw( | |||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi); | |||
} | |||
#endif | |||
AR_END(FEFetchShader, 0); | |||
RDTSC_END(FEFetchShader, 0); | |||
// forward fetch generated vertex IDs to the vertex shader | |||
#if USE_SIMD16_SHADERS | |||
@@ -1780,7 +1780,7 @@ void ProcessDraw( | |||
if (!KNOB_TOSS_FETCH) | |||
#endif | |||
{ | |||
AR_BEGIN(FEVertexShader, pDC->drawId); | |||
RDTSC_BEGIN(FEVertexShader, pDC->drawId); | |||
#if USE_SIMD16_VS | |||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); | |||
#else | |||
@@ -1791,7 +1791,7 @@ void ProcessDraw( | |||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi); | |||
} | |||
#endif | |||
AR_END(FEVertexShader, 0); | |||
RDTSC_END(FEVertexShader, 0); | |||
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); | |||
} | |||
@@ -1979,9 +1979,9 @@ void ProcessDraw( | |||
{ | |||
// 1. Execute FS/VS for a single SIMD. | |||
AR_BEGIN(FEFetchShader, pDC->drawId); | |||
RDTSC_BEGIN(FEFetchShader, pDC->drawId); | |||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout); | |||
AR_END(FEFetchShader, 0); | |||
RDTSC_END(FEFetchShader, 0); | |||
// forward fetch generated vertex IDs to the vertex shader | |||
vsContext.VertexID = fetchInfo.VertexID; | |||
@@ -2001,9 +2001,9 @@ void ProcessDraw( | |||
if (!KNOB_TOSS_FETCH) | |||
#endif | |||
{ | |||
AR_BEGIN(FEVertexShader, pDC->drawId); | |||
RDTSC_BEGIN(FEVertexShader, pDC->drawId); | |||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext); | |||
AR_END(FEVertexShader, 0); | |||
RDTSC_END(FEVertexShader, 0); | |||
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); | |||
} | |||
@@ -2014,9 +2014,9 @@ void ProcessDraw( | |||
{ | |||
simdvector prim[MAX_NUM_VERTS_PER_PRIM]; | |||
// PaAssemble returns false if there is not enough verts to assemble. | |||
AR_BEGIN(FEPAAssemble, pDC->drawId); | |||
RDTSC_BEGIN(FEPAAssemble, pDC->drawId); | |||
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim); | |||
AR_END(FEPAAssemble, 1); | |||
RDTSC_END(FEPAAssemble, 1); | |||
#if KNOB_ENABLE_TOSS_POINTS | |||
if (!KNOB_TOSS_FETCH) | |||
@@ -2104,7 +2104,7 @@ void ProcessDraw( | |||
#endif | |||
AR_END(FEProcessDraw, numPrims * work.numInstances); | |||
RDTSC_END(FEProcessDraw, numPrims * work.numInstances); | |||
} | |||
struct FEDrawChooser |
@@ -53,7 +53,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi | |||
#endif | |||
// bloat line to two tris and call the triangle rasterizer twice | |||
AR_BEGIN(BERasterizeLine, pDC->drawId); | |||
RDTSC_BEGIN(BERasterizeLine, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
const SWR_RASTSTATE &rastState = state.rastState; | |||
@@ -246,7 +246,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi | |||
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); | |||
} | |||
AR_END(BERasterizeLine, 1); | |||
RDTSC_BEGIN(BERasterizeLine, 1); | |||
} | |||
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) | |||
@@ -308,9 +308,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi | |||
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, | |||
renderBuffers, triDesc.triFlags.renderTargetArrayIndex); | |||
AR_BEGIN(BEPixelBackend, pDC->drawId); | |||
RDTSC_BEGIN(BEPixelBackend, pDC->drawId); | |||
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers); | |||
AR_END(BEPixelBackend, 0); | |||
RDTSC_END(BEPixelBackend, 0); | |||
} | |||
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) |
@@ -781,9 +781,9 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT> | |||
} | |||
// not trivial accept or reject, must rasterize full tile | |||
AR_BEGIN(BERasterizePartial, pDC->drawId); | |||
RDTSC_BEGIN(BERasterizePartial, pDC->drawId); | |||
innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges); | |||
AR_END(BERasterizePartial, 0); | |||
RDTSC_END(BERasterizePartial, 0); | |||
} | |||
}; | |||
@@ -847,8 +847,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
return; | |||
} | |||
#endif | |||
AR_BEGIN(BERasterizeTriangle, pDC->drawId); | |||
AR_BEGIN(BETriangleSetup, pDC->drawId); | |||
RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId); | |||
RDTSC_BEGIN(BETriangleSetup, pDC->drawId); | |||
const API_STATE &state = GetApiState(pDC); | |||
const SWR_RASTSTATE &rastState = state.rastState; | |||
@@ -1014,7 +1014,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0); | |||
AR_END(BETriangleSetup, 0); | |||
RDTSC_END(BETriangleSetup, 0); | |||
// update triangle desc | |||
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); | |||
@@ -1027,11 +1027,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
if (numTilesX == 0 || numTilesY == 0) | |||
{ | |||
RDTSC_EVENT(BEEmptyTriangle, 1, 0); | |||
AR_END(BERasterizeTriangle, 1); | |||
RDTSC_END(BERasterizeTriangle, 1); | |||
return; | |||
} | |||
AR_BEGIN(BEStepSetup, pDC->drawId); | |||
RDTSC_BEGIN(BEStepSetup, pDC->drawId); | |||
// Step to pixel center of top-left pixel of the triangle bbox | |||
// Align intersect bbox (top/left) to raster tile's (top/left). | |||
@@ -1140,7 +1140,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
} | |||
} | |||
AR_END(BEStepSetup, 0); | |||
RDTSC_END(BEStepSetup, 0); | |||
uint32_t tY = minTileY; | |||
uint32_t tX = minTileX; | |||
@@ -1233,9 +1233,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
} | |||
// not trivial accept or reject, must rasterize full tile | |||
AR_BEGIN(BERasterizePartial, pDC->drawId); | |||
RDTSC_BEGIN(BERasterizePartial, pDC->drawId); | |||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges); | |||
AR_END(BERasterizePartial, 0); | |||
RDTSC_END(BERasterizePartial, 0); | |||
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; | |||
@@ -1271,9 +1271,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage); | |||
} | |||
AR_BEGIN(BEPixelBackend, pDC->drawId); | |||
RDTSC_BEGIN(BEPixelBackend, pDC->drawId); | |||
backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers); | |||
AR_END(BEPixelBackend, 0); | |||
RDTSC_END(BEPixelBackend, 0); | |||
} | |||
// step to the next tile in X | |||
@@ -1292,7 +1292,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow); | |||
} | |||
AR_END(BERasterizeTriangle, 1); | |||
RDTSC_END(BERasterizeTriangle, 1); | |||
} | |||
// Get pointers to hot tile memory for color RT, depth, stencil |
@@ -541,7 +541,7 @@ bool WorkOnFifoBE( | |||
{ | |||
BE_WORK *pWork; | |||
AR_BEGIN(WorkerFoundWork, pDC->drawId); | |||
RDTSC_BEGIN(WorkerFoundWork, pDC->drawId); | |||
uint32_t numWorkItems = tile->getNumQueued(); | |||
SWR_ASSERT(numWorkItems); | |||
@@ -562,7 +562,7 @@ bool WorkOnFifoBE( | |||
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); | |||
tile->dequeue(); | |||
} | |||
AR_END(WorkerFoundWork, numWorkItems); | |||
RDTSC_END(WorkerFoundWork, numWorkItems); | |||
_ReadWriteBarrier(); | |||
@@ -849,9 +849,9 @@ DWORD workerThreadMain(LPVOID pData) | |||
if (IsBEThread) | |||
{ | |||
AR_BEGIN(WorkerWorkOnFifoBE, 0); | |||
RDTSC_BEGIN(WorkerWorkOnFifoBE, 0); | |||
bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask); | |||
AR_END(WorkerWorkOnFifoBE, 0); | |||
RDTSC_END(WorkerWorkOnFifoBE, 0); | |||
WorkOnCompute(pContext, workerId, curDrawBE); | |||
} |
@@ -396,19 +396,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui | |||
if (pHotTile->state == HOTTILE_INVALID) | |||
{ | |||
AR_BEGIN(BELoadTiles, pDC->drawId); | |||
RDTSC_BEGIN(BELoadTiles, pDC->drawId); | |||
// invalid hottile before draw requires a load from surface before we can draw to it | |||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); | |||
pHotTile->state = HOTTILE_DIRTY; | |||
AR_END(BELoadTiles, 0); | |||
RDTSC_END(BELoadTiles, 0); | |||
} | |||
else if (pHotTile->state == HOTTILE_CLEAR) | |||
{ | |||
AR_BEGIN(BELoadTiles, pDC->drawId); | |||
RDTSC_BEGIN(BELoadTiles, pDC->drawId); | |||
// Clear the tile. | |||
ClearColorHotTile(pHotTile); | |||
pHotTile->state = HOTTILE_DIRTY; | |||
AR_END(BELoadTiles, 0); | |||
RDTSC_END(BELoadTiles, 0); | |||
} | |||
colorHottileEnableMask &= ~(1 << rtSlot); | |||
} | |||
@@ -419,19 +419,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui | |||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); | |||
if (pHotTile->state == HOTTILE_INVALID) | |||
{ | |||
AR_BEGIN(BELoadTiles, pDC->drawId); | |||
RDTSC_BEGIN(BELoadTiles, pDC->drawId); | |||
// invalid hottile before draw requires a load from surface before we can draw to it | |||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); | |||
pHotTile->state = HOTTILE_DIRTY; | |||
AR_END(BELoadTiles, 0); | |||
RDTSC_END(BELoadTiles, 0); | |||
} | |||
else if (pHotTile->state == HOTTILE_CLEAR) | |||
{ | |||
AR_BEGIN(BELoadTiles, pDC->drawId); | |||
RDTSC_BEGIN(BELoadTiles, pDC->drawId); | |||
// Clear the tile. | |||
ClearDepthHotTile(pHotTile); | |||
pHotTile->state = HOTTILE_DIRTY; | |||
AR_END(BELoadTiles, 0); | |||
RDTSC_END(BELoadTiles, 0); | |||
} | |||
} | |||
@@ -441,19 +441,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui | |||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); | |||
if (pHotTile->state == HOTTILE_INVALID) | |||
{ | |||
AR_BEGIN(BELoadTiles, pDC->drawId); | |||
RDTSC_BEGIN(BELoadTiles, pDC->drawId); | |||
// invalid hottile before draw requires a load from surface before we can draw to it | |||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); | |||
pHotTile->state = HOTTILE_DIRTY; | |||
AR_END(BELoadTiles, 0); | |||
RDTSC_END(BELoadTiles, 0); | |||
} | |||
else if (pHotTile->state == HOTTILE_CLEAR) | |||
{ | |||
AR_BEGIN(BELoadTiles, pDC->drawId); | |||
RDTSC_BEGIN(BELoadTiles, pDC->drawId); | |||
// Clear the tile. | |||
ClearStencilHotTile(pHotTile); | |||
pHotTile->state = HOTTILE_DIRTY; | |||
AR_END(BELoadTiles, 0); | |||
RDTSC_END(BELoadTiles, 0); | |||
} | |||
} | |||
} |