Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>tags/13.0-branchpoint
@@ -109,8 +109,6 @@ template <> | |||
struct ConservativeRastFETraits<StandardRastT> | |||
{ | |||
typedef std::false_type IsConservativeT; | |||
typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT; | |||
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -119,13 +117,7 @@ template <> | |||
struct ConservativeRastFETraits<ConservativeRastT> | |||
{ | |||
typedef std::true_type IsConservativeT; | |||
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT; | |||
/// Conservative bounding box needs to expand the area around each vertex by 1/512, which | |||
/// is the potential snapping error when going from FP-> 16.8 fixed | |||
typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT; | |||
typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT; | |||
typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// |
@@ -1446,7 +1446,7 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( | |||
/// @param pLinkageMap - maps VS attribute slot to PS slot | |||
/// @param triIndex - Triangle to process attributes for | |||
/// @param pBuffer - Output result | |||
template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT> | |||
template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate> | |||
INLINE void ProcessAttributes( | |||
DRAW_CONTEXT *pDC, | |||
PA_STATE&pa, | |||
@@ -1456,7 +1456,8 @@ INLINE void ProcessAttributes( | |||
{ | |||
static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT"); | |||
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; | |||
LONG constantInterpMask = backendState.constantInterpolationMask; | |||
// Conservative Rasterization requires degenerate tris to have constant attribute interpolation | |||
LONG constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask; | |||
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex; | |||
const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology; | |||
@@ -1483,7 +1484,7 @@ INLINE void ProcessAttributes( | |||
__m128 attrib[3]; // triangle attribs (always 4 wide) | |||
float* pAttribStart = pBuffer; | |||
if (HasConstantInterpT::value) | |||
if (HasConstantInterpT::value || IsDegenerate::value) | |||
{ | |||
if (_bittest(&constantInterpMask, i)) | |||
{ | |||
@@ -1605,9 +1606,9 @@ struct ProcessAttributesChooser | |||
} | |||
}; | |||
PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp) | |||
PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false) | |||
{ | |||
return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp); | |||
return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -1668,38 +1669,19 @@ INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn) | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief Helper function to set the X,Y coords of a triangle to the | |||
/// requested Fixed Point precision from FP32. If the RequestedT | |||
/// FixedPointTraits precision is the same as the CurrentT, no extra | |||
/// conversions will be done. If they are different, convert from FP32 | |||
/// to the Requested precision and set vXi, vYi | |||
/// @tparam RequestedT: requested FixedPointTraits type | |||
/// @tparam CurrentT: FixedPointTraits type of the last | |||
template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>> | |||
struct FPToFixedPoint | |||
/// requested Fixed Point precision from FP32. | |||
/// @param tri: simdvector[3] of FP triangle verts | |||
/// @param vXi: fixed point X coords of tri verts | |||
/// @param vYi: fixed point Y coords of tri verts | |||
INLINE static void FPToFixedPoint(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]) | |||
{ | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @param tri: simdvector[3] of FP triangle verts | |||
/// @param vXi: fixed point X coords of tri verts | |||
/// @param vYi: fixed point Y coords of tri verts | |||
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]) | |||
{ | |||
vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x); | |||
vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y); | |||
vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x); | |||
vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y); | |||
vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x); | |||
vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y); | |||
}; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief In the case where the RequestedT and CurrentT fixed point | |||
/// precisions are the same, do nothing. | |||
template<typename RequestedT> | |||
struct FPToFixedPoint<RequestedT, RequestedT> | |||
{ | |||
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){}; | |||
}; | |||
vXi[0] = fpToFixedPointVertical(tri[0].x); | |||
vYi[0] = fpToFixedPointVertical(tri[0].y); | |||
vXi[1] = fpToFixedPointVertical(tri[1].x); | |||
vYi[1] = fpToFixedPointVertical(tri[1].y); | |||
vXi[2] = fpToFixedPointVertical(tri[2].x); | |||
vYi[2] = fpToFixedPointVertical(tri[2].y); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief Calculate bounding box for current triangle | |||
@@ -1710,20 +1692,8 @@ struct FPToFixedPoint<RequestedT, RequestedT> | |||
/// *Note*: expects vX, vY to be in the correct precision for the type | |||
/// of rasterization. This avoids unnecessary FP->fixed conversions. | |||
template <typename CT> | |||
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){} | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical | |||
template <> | |||
INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox) | |||
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox) | |||
{ | |||
// FE conservative rast traits | |||
typedef FEStandardRastT CT; | |||
static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision"); | |||
// Update vXi, vYi fixed point precision for BBox calculation if necessary | |||
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY); | |||
simdscalari vMinX = vX[0]; | |||
vMinX = _simd_min_epi32(vMinX, vX[1]); | |||
vMinX = _simd_min_epi32(vMinX, vX[2]); | |||
@@ -1755,10 +1725,6 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c | |||
// FE conservative rast traits | |||
typedef FEConservativeRastT CT; | |||
static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision"); | |||
// Update vXi, vYi fixed point precision for BBox calculation if necessary | |||
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY); | |||
simdscalari vMinX = vX[0]; | |||
vMinX = _simd_min_epi32(vMinX, vX[1]); | |||
vMinX = _simd_min_epi32(vMinX, vX[2]); | |||
@@ -1776,10 +1742,11 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c | |||
vMaxY = _simd_max_epi32(vMaxY, vY[2]); | |||
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization | |||
bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); | |||
bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); | |||
bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); | |||
bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); | |||
/// expand bbox by 1/256; coverage will be correctly handled in the rasterizer. | |||
bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); | |||
bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); | |||
bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); | |||
bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -1808,10 +1775,6 @@ void BinTriangles( | |||
const SWR_GS_STATE& gsState = state.gsState; | |||
MacroTileMgr *pTileMgr = pDC->pTileMgr; | |||
// Select attribute processor | |||
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3, | |||
state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); | |||
simdscalar vRecipW0 = _simd_set1_ps(1.0f); | |||
simdscalar vRecipW1 = _simd_set1_ps(1.0f); | |||
@@ -1852,8 +1815,8 @@ void BinTriangles( | |||
tri[2].y = _simd_add_ps(tri[2].y, offset); | |||
simdscalari vXi[3], vYi[3]; | |||
// Set vXi, vYi to fixed point precision required for degenerate triangle check | |||
FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi); | |||
// Set vXi, vYi to required fixed point precision | |||
FPToFixedPoint(tri, vXi, vYi); | |||
// triangle setup | |||
simdscalari vAi[3], vBi[3]; | |||
@@ -1863,8 +1826,6 @@ void BinTriangles( | |||
simdscalari vDet[2]; | |||
calcDeterminantIntVertical(vAi, vBi, vDet); | |||
/// todo: handle degen tri's for Conservative Rast. | |||
// cull zero area | |||
int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si()))); | |||
int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si()))); | |||
@@ -1872,11 +1833,15 @@ void BinTriangles( | |||
int cullZeroAreaMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH / 2)); | |||
uint32_t origTriMask = triMask; | |||
triMask &= ~cullZeroAreaMask; | |||
// don't cull degenerate triangles if we're conservatively rasterizing | |||
if(!CT::IsConservativeT::value) | |||
{ | |||
triMask &= ~cullZeroAreaMask; | |||
} | |||
// determine front winding tris | |||
// CW +det | |||
// CCW -det | |||
// CCW det <= 0; 0 area triangles are marked as backfacing, which is required behavior for conservative rast | |||
maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[0], _simd_setzero_si()))); | |||
maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[1], _simd_setzero_si()))); | |||
int cwTriMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH /2) ); | |||
@@ -1898,6 +1863,7 @@ void BinTriangles( | |||
case SWR_CULLMODE_BOTH: cullTris = 0xffffffff; break; | |||
case SWR_CULLMODE_NONE: cullTris = 0x0; break; | |||
case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break; | |||
// 0 area triangles are marked as backfacing, which is required behavior for conservative rast | |||
case SWR_CULLMODE_BACK: cullTris = ~frontWindingTris; break; | |||
default: SWR_ASSERT(false, "Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break; | |||
} | |||
@@ -1916,9 +1882,53 @@ void BinTriangles( | |||
DWORD triIndex = 0; | |||
// for center sample pattern, all samples are at pixel center; calculate coverage | |||
// once at center and broadcast the results in the backend | |||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; | |||
PFN_WORK_FUNC pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0), | |||
pDC->pState->state.psState.inputCoverage, (rastState.scissorEnable > 0)); | |||
const SWR_MULTISAMPLE_COUNT sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; | |||
uint32_t edgeEnable; | |||
PFN_WORK_FUNC pfnWork; | |||
if(CT::IsConservativeT::value) | |||
{ | |||
// determine which edges of the degenerate tri, if any, are valid to rasterize. | |||
// used to call the appropriate templated rasterizer function | |||
if(cullZeroAreaMask > 0) | |||
{ | |||
// e0 = v1-v0 | |||
simdscalari x0x1Mask = _simd_cmpeq_epi32(vXi[0], vXi[1]); | |||
simdscalari y0y1Mask = _simd_cmpeq_epi32(vYi[0], vYi[1]); | |||
uint32_t e0Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x0x1Mask, y0y1Mask))); | |||
// e1 = v2-v1 | |||
simdscalari x1x2Mask = _simd_cmpeq_epi32(vXi[1], vXi[2]); | |||
simdscalari y1y2Mask = _simd_cmpeq_epi32(vYi[1], vYi[2]); | |||
uint32_t e1Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x1x2Mask, y1y2Mask))); | |||
// e2 = v0-v2 | |||
// if v0 == v1 & v1 == v2, v0 == v2 | |||
uint32_t e2Mask = e0Mask & e1Mask; | |||
SWR_ASSERT(KNOB_SIMD_WIDTH == 8, "Need to update degenerate mask code for avx512"); | |||
// edge order: e0 = v0v1, e1 = v1v2, e2 = v0v2 | |||
// 32 bit binary: 0000 0000 0010 0100 1001 0010 0100 1001 | |||
e0Mask = pdep_u32(e0Mask, 0x00249249); | |||
// 32 bit binary: 0000 0000 0100 1001 0010 0100 1001 0010 | |||
e1Mask = pdep_u32(e1Mask, 0x00492492); | |||
// 32 bit binary: 0000 0000 1001 0010 0100 1001 0010 0100 | |||
e2Mask = pdep_u32(e2Mask, 0x00924924); | |||
edgeEnable = (0x00FFFFFF & (~(e0Mask | e1Mask | e2Mask))); | |||
} | |||
else | |||
{ | |||
edgeEnable = 0x00FFFFFF; | |||
} | |||
} | |||
else | |||
{ | |||
// degenerate triangles won't be sent to rasterizer; just enable all edges | |||
pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0), | |||
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, | |||
(rastState.scissorEnable > 0)); | |||
} | |||
if (!triMask) | |||
{ | |||
goto endBinTriangles; | |||
@@ -1969,6 +1979,16 @@ void BinTriangles( | |||
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right)); | |||
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom)); | |||
if(CT::IsConservativeT::value) | |||
{ | |||
// in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has | |||
// some area. Bump the right/bottom edges out | |||
simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom); | |||
bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom); | |||
simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right); | |||
bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight); | |||
} | |||
// Cull tris completely outside scissor | |||
{ | |||
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right); | |||
@@ -2026,7 +2046,28 @@ void BinTriangles( | |||
BE_WORK work; | |||
work.type = DRAW; | |||
work.pfnWork = pfnWork; | |||
bool isDegenerate; | |||
if(CT::IsConservativeT::value) | |||
{ | |||
// only rasterize valid edges if we have a degenerate primitive | |||
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID; | |||
work.pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0), | |||
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, | |||
(rastState.scissorEnable > 0)); | |||
// Degenerate triangles are required to be constant interpolated | |||
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false; | |||
} | |||
else | |||
{ | |||
isDegenerate = false; | |||
work.pfnWork = pfnWork; | |||
} | |||
// Select attribute processor | |||
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3, | |||
state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate); | |||
TRIANGLE_WORK_DESC &desc = work.desc.tri; | |||
@@ -88,7 +88,7 @@ struct EDGE | |||
/// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C) | |||
/// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad. | |||
/// Used to step between quads when sweeping over the raster tile. | |||
template<uint32_t NumEdges> | |||
template<uint32_t NumEdges, typename EdgeMaskT> | |||
INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges) | |||
{ | |||
uint64_t coverageMask = 0; | |||
@@ -120,25 +120,25 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg | |||
// evaluate which pixels in the quad are covered | |||
#define EVAL \ | |||
UnrollerL<0, NumEdges, 1>::step(eval_lambda); | |||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda); | |||
// update coverage mask | |||
#define UPDATE_MASK(bit) \ | |||
mask = edgeMask[0]; \ | |||
UnrollerL<1, NumEdges, 1>::step(update_lambda); \ | |||
UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \ | |||
coverageMask |= (mask << bit); | |||
// step in the +x direction to the next quad | |||
#define INCX \ | |||
UnrollerL<0, NumEdges, 1>::step(incx_lambda); | |||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda); | |||
// step in the +y direction to the next quad | |||
#define INCY \ | |||
UnrollerL<0, NumEdges, 1>::step(incy_lambda); | |||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda); | |||
// step in the -x direction to the next quad | |||
#define DECX \ | |||
UnrollerL<0, NumEdges, 1>::step(decx_lambda); | |||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda); | |||
// sweep 2x2 quad back and forth through the raster tile, | |||
// computing coverage masks for the entire tile | |||
@@ -274,6 +274,17 @@ INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256 | |||
vEdge = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief calculates difference in precision between the result of manh | |||
/// calculation and the edge precision, based on compile time trait values | |||
template<typename RT> | |||
constexpr int64_t ManhToEdgePrecisionAdjust() | |||
{ | |||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value, | |||
"Inadequate precision of result of manh calculation "); | |||
return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @struct adjustEdgeConservative | |||
/// @brief Primary template definition used for partially specializing | |||
@@ -306,15 +317,15 @@ struct adjustEdgeConservative<RT, std::true_type> | |||
/// instead of having to test individual pixel corners for conservative coverage | |||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge) | |||
{ | |||
/// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away | |||
/// from the pixel center (in the direction of the edge normal A/B) | |||
// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away | |||
// from the pixel center (in the direction of the edge normal A/B) | |||
/// edge = Ax + Bx + C - (manh/e) | |||
/// manh = manhattan distance = abs(A) + abs(B) | |||
/// e = absolute rounding error from snapping from float to fixed point precision | |||
// edge = Ax + Bx + C - (manh/e) | |||
// manh = manhattan distance = abs(A) + abs(B) | |||
// e = absolute rounding error from snapping from float to fixed point precision | |||
/// 'fixed point' multiply (in double to be avx1 friendly) | |||
/// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example | |||
// 'fixed point' multiply (in double to be avx1 friendly) | |||
// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example | |||
__m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi)); | |||
__m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)), | |||
_mm256_mul_pd(vBai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value))); | |||
@@ -322,15 +333,13 @@ struct adjustEdgeConservative<RT, std::true_type> | |||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value, | |||
"Inadequate precision of result of manh calculation "); | |||
/// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision | |||
/// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right | |||
manh = _mm256_mul_pd(manh, _mm256_set1_pd(((RT::PrecisionT::BitsT::value + | |||
RT::ConservativePrecisionT::BitsT::value) - | |||
RT::EdgePrecisionT::BitsT::value) * 0.5)); | |||
/// move the edge away from the pixel center by the required conservative precision + 1/2 pixel | |||
/// this allows the rasterizer to do a single conservative coverage test to see if the primitive | |||
/// intersects the pixel at all | |||
// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision | |||
// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right | |||
manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5)); | |||
// move the edge away from the pixel center by the required conservative precision + 1/2 pixel | |||
// this allows the rasterizer to do a single conservative coverage test to see if the primitive | |||
// intersects the pixel at all | |||
vEdge = _mm256_sub_pd(vEdge, manh); | |||
}; | |||
}; | |||
@@ -346,6 +355,19 @@ struct adjustEdgeConservative<RT, std::false_type> | |||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge){}; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief calculates the distance a degenerate BBox needs to be adjusted | |||
/// for conservative rast based on compile time trait values | |||
template<typename RT> | |||
constexpr int64_t ConservativeScissorOffset() | |||
{ | |||
static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision"); | |||
// if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges | |||
typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT; | |||
// 1/2 pixel edge offset + conservative offset - degenerateTriangle | |||
return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value)); | |||
} | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief Performs calculations to adjust each a scalar edge out | |||
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y | |||
@@ -354,13 +376,7 @@ template <typename RT> | |||
INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge) | |||
{ | |||
int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b)); | |||
int64_t manh = ((aabs * RT::ConservativeEdgeOffsetT::value) + (babs * RT::ConservativeEdgeOffsetT::value)) >> | |||
((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value); | |||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value, | |||
"Inadequate precision of result of manh calculation "); | |||
int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>(); | |||
vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh)); | |||
}; | |||
@@ -371,7 +387,7 @@ INLINE void adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vE | |||
{ | |||
static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value, | |||
"Edge equation expected to be in x.16 fixed point"); | |||
/// need to offset the edge before applying the top-left rule | |||
// need to offset the edge before applying the top-left rule | |||
adjustEdgeConservative<RT, typename RT::IsConservativeT>(vAi, vBi, vEdge); | |||
adjustTopLeftRuleIntFix16(vAi, vBi, vEdge); | |||
@@ -563,14 +579,13 @@ struct ComputeScissorEdges | |||
template <typename RT> | |||
struct ComputeScissorEdges<std::true_type, std::true_type, RT> | |||
{ | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors, | |||
/// evaluate edge equations and offset them away from pixel center. | |||
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y, | |||
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]) | |||
{ | |||
/// if conservative rasterizing, triangle bbox intersected with scissor bbox is used | |||
// if conservative rasterizing, triangle bbox intersected with scissor bbox is used | |||
BBOX scissor; | |||
scissor.left = std::max(triBBox.left, scissorBBox.left); | |||
scissor.right = std::min(triBBox.right, scissorBBox.right); | |||
@@ -593,7 +608,7 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT> | |||
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom))); | |||
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top))); | |||
/// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing | |||
// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing | |||
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]); | |||
adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]); | |||
adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]); | |||
@@ -632,6 +647,81 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT> | |||
} | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief Primary function template for TrivialRejectTest. Should | |||
/// never be called, but TemplateUnroller instantiates a few unused values, | |||
/// so it calls a runtime assert instead of a static_assert. | |||
template <typename ValidEdgeMaskT> | |||
INLINE bool TrivialRejectTest(const int, const int, const int) | |||
{ | |||
SWR_ASSERT(0, "Primary templated function should never be called"); | |||
return false; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief E0E1ValidT specialization of TrivialRejectTest. Tests edge 0 | |||
/// and edge 1 for trivial coverage reject | |||
template <> | |||
INLINE bool TrivialRejectTest<E0E1ValidT>(const int mask0, const int mask1, const int) | |||
{ | |||
return (!(mask0 && mask1)) ? true : false; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief E0E2ValidT specialization of TrivialRejectTest. Tests edge 0 | |||
/// and edge 2 for trivial coverage reject | |||
template <> | |||
INLINE bool TrivialRejectTest<E0E2ValidT>(const int mask0, const int, const int mask2) | |||
{ | |||
return (!(mask0 && mask2)) ? true : false; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief E1E2ValidT specialization of TrivialRejectTest. Tests edge 1 | |||
/// and edge 2 for trivial coverage reject | |||
template <> | |||
INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int mask2) | |||
{ | |||
return (!(mask1 && mask2)) ? true : false; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief AllEdgesValidT specialization of TrivialRejectTest. Tests all | |||
/// primitive edges for trivial coverage reject | |||
template <> | |||
INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2) | |||
{ | |||
return (!(mask0 && mask1 && mask2)) ? true : false;; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief NoEdgesValidT specialization of TrivialRejectTest. Degenerate | |||
/// point, so return false and rasterize against conservative BBox | |||
template <> | |||
INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int) | |||
{ | |||
return false; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief Primary function template for TrivialAcceptTest. Always returns | |||
/// false, since it will only be called for degenerate tris, and as such | |||
/// will never cover the entire raster tile | |||
template <typename ValidEdgeMaskT> | |||
INLINE bool TrivialAcceptTest(const int, const int, const int) | |||
{ | |||
return false; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief AllEdgesValidT specialization for TrivialAcceptTest. Test all | |||
/// edge masks for a fully covered raster tile | |||
template <> | |||
INLINE bool TrivialAcceptTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2) | |||
{ | |||
return ((mask0 & mask1 & mask2) == 0xf); | |||
}; | |||
template <typename RT> | |||
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc) | |||
{ | |||
@@ -681,8 +771,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
// determinant | |||
float det = calcDeterminantInt(vAi, vBi); | |||
/// Verts in Pixel Coordinate Space at this point | |||
/// Det > 0 = CW winding order | |||
// Verts in Pixel Coordinate Space at this point | |||
// Det > 0 = CW winding order | |||
// Convert CW triangles to CCW | |||
if (det > 0.0) | |||
{ | |||
@@ -693,28 +783,39 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
det = -det; | |||
} | |||
/// @todo: handle degenerates for ConservativeRast | |||
__m128 vC; | |||
// Finish triangle setup - C edge coef | |||
triangleSetupC(vX, vY, vA, vB, vC); | |||
// compute barycentric i and j | |||
// i = (A1x + B1y + C1)/det | |||
// j = (A2x + B2y + C2)/det | |||
__m128 vDet = _mm_set1_ps(det); | |||
__m128 vRecipDet = _mm_div_ps(_mm_set1_ps(1.0f), vDet);//_mm_rcp_ps(vDet); | |||
_mm_store_ss(&triDesc.recipDet, vRecipDet); | |||
// only extract coefs for 2 of the barycentrics; the 3rd can be | |||
// determined from the barycentric equation: | |||
// i + j + k = 1 <=> k = 1 - j - i | |||
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1); | |||
_MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1); | |||
_MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1); | |||
_MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2); | |||
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2); | |||
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2); | |||
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID) | |||
{ | |||
// If we have degenerate edge(s) to rasterize, set I and J coefs | |||
// to 0 for constant interpolation of attributes | |||
triDesc.I[0] = 0.0f; | |||
triDesc.I[1] = 0.0f; | |||
triDesc.I[2] = 0.0f; | |||
triDesc.J[0] = 0.0f; | |||
triDesc.J[1] = 0.0f; | |||
triDesc.J[2] = 0.0f; | |||
// Degenerate triangles have no area | |||
triDesc.recipDet = 0.0f; | |||
} | |||
else | |||
{ | |||
// only extract coefs for 2 of the barycentrics; the 3rd can be | |||
// determined from the barycentric equation: | |||
// i + j + k = 1 <=> k = 1 - j - i | |||
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1); | |||
_MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1); | |||
_MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1); | |||
_MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2); | |||
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2); | |||
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2); | |||
// compute recipDet, used to calculate barycentric i and j in the backend | |||
triDesc.recipDet = 1.0f/det; | |||
} | |||
OSALIGNSIMD(float) oneOverW[4]; | |||
_mm_store_ps(oneOverW, vRecipW); | |||
@@ -764,6 +865,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
OSALIGNSIMD(BBOX) bbox; | |||
calcBoundingBoxInt(vXi, vYi, bbox); | |||
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID) | |||
{ | |||
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid | |||
bbox.left--; bbox.right++; bbox.top--; bbox.bottom++; | |||
SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0, | |||
"Conservative rast degenerate handling requires a valid scissor rect"); | |||
} | |||
// Intersect with scissor/viewport | |||
OSALIGNSIMD(BBOX) intersect; | |||
intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left); | |||
@@ -941,13 +1050,13 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
for (uint32_t sampleNum = 0; sampleNum < NumRasterSamplesT::value; sampleNum++) | |||
{ | |||
// trivial reject, at least one edge has all 4 corners of raster tile outside | |||
bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false; | |||
bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2); | |||
if (!trivialReject) | |||
{ | |||
// trivial accept mask | |||
triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL; | |||
if ((mask0 & mask1 & mask2) == 0xf) | |||
if (TrivialAcceptTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2)) | |||
{ | |||
triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum]; | |||
// trivial accept, all 4 corners of all 3 edges are negative | |||
@@ -991,7 +1100,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, | |||
// not trivial accept or reject, must rasterize full tile | |||
RDTSC_START(BERasterizePartial); | |||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value>(pDC, startQuadEdges, rastEdges); | |||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges); | |||
RDTSC_STOP(BERasterizePartial, 0, 0); | |||
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; | |||
@@ -1101,7 +1210,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, | |||
// once at center and broadcast the results in the backend | |||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; | |||
// conservative rast not supported for points/lines | |||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0)); | |||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0)); | |||
// overwrite texcoords for point sprites | |||
if (isPointSpriteTexCoordEnabled) | |||
@@ -1429,7 +1538,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi | |||
PFN_WORK_FUNC pfnTriRast; | |||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; | |||
// conservative rast not supported for points/lines | |||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0)); | |||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0)); | |||
// make sure this macrotile intersects the triangle | |||
__m128i vXai = fpToFixedPoint(vXa); | |||
@@ -1541,6 +1650,7 @@ PFN_WORK_FUNC GetRasterizerFunc( | |||
uint32_t numSamples, | |||
bool IsConservative, | |||
uint32_t InputCoverage, | |||
uint32_t EdgeEnable, | |||
bool RasterizeScissorEdges | |||
) | |||
{ | |||
@@ -1548,5 +1658,6 @@ PFN_WORK_FUNC GetRasterizerFunc( | |||
IntArg<SWR_MULTISAMPLE_1X,SWR_MULTISAMPLE_TYPE_COUNT-1>{numSamples}, | |||
IsConservative, | |||
IntArg<SWR_INPUT_COVERAGE_NONE, SWR_INPUT_COVERAGE_COUNT-1>{InputCoverage}, | |||
IntArg<0, VALID_TRI_EDGE_COUNT-1>{EdgeEnable}, | |||
RasterizeScissorEdges); | |||
} |
@@ -48,8 +48,28 @@ PFN_WORK_FUNC GetRasterizerFunc( | |||
uint32_t numSamples, | |||
bool IsConservative, | |||
uint32_t InputCoverage, | |||
uint32_t EdgeEnable, | |||
bool RasterizeScissorEdges); | |||
enum ValidTriEdges | |||
{ | |||
NO_VALID_EDGES = 0, | |||
E0_E1_VALID = 0x3, | |||
E0_E2_VALID = 0x5, | |||
E1_E2_VALID = 0x6, | |||
ALL_EDGES_VALID = 0x7, | |||
VALID_TRI_EDGE_COUNT, | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief ValidTriEdges convenience typedefs used for templated function | |||
/// specialization supported Fixed Point precisions | |||
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT; | |||
typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT; | |||
typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT; | |||
typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT; | |||
typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @struct RasterScissorEdgesT | |||
/// @brief Primary RasterScissorEdgesT templated struct that holds compile | |||
@@ -59,22 +79,26 @@ PFN_WORK_FUNC GetRasterizerFunc( | |||
/// 3 triangle edges + 4 scissor edges for coverage. | |||
/// @tparam RasterScissorEdgesT: number of multisamples | |||
/// @tparam ConservativeT: is this a conservative rasterization | |||
template <typename RasterScissorEdgesT, typename ConservativeT> | |||
/// @tparam EdgeMaskT: Which edges are valid(not degenerate) | |||
template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT> | |||
struct RasterEdgeTraits | |||
{ | |||
typedef std::true_type RasterizeScissorEdgesT; | |||
typedef std::integral_constant<uint32_t, 7> NumEdgesT; | |||
typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
/// @brief specialization of RasterEdgeTraits. If neither scissor rect | |||
/// nor conservative rast is enabled, only test 3 triangle edges | |||
/// for coverage | |||
template <> | |||
struct RasterEdgeTraits<std::false_type, std::false_type> | |||
template <typename EdgeMaskT> | |||
struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT> | |||
{ | |||
typedef std::false_type RasterizeScissorEdgesT; | |||
typedef std::integral_constant<uint32_t, 3> NumEdgesT; | |||
// no need for degenerate edge masking in non-conservative case; rasterize all triangle edges | |||
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT; | |||
}; | |||
////////////////////////////////////////////////////////////////////////// | |||
@@ -86,19 +110,19 @@ struct RasterEdgeTraits<std::false_type, std::false_type> | |||
/// @tparam InputCoverageT: what type of input coverage is the PS expecting? | |||
/// (only used with conservative rasterization) | |||
/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor? | |||
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename RasterScissorEdgesT> | |||
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT> | |||
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>, | |||
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT> | |||
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>> | |||
{ | |||
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value)> MT; | |||
/// Fixed point precision the rasterizer is using | |||
typedef FixedPointTraits<Fixed_16_8> PrecisionT; | |||
/// Fixed point precision of the edge tests used during rasterization | |||
typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT; | |||
// If conservative rast is enabled, only need a single sample coverage test, with the result copied to all samples | |||
typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT; | |||
typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT; | |||
static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value, | |||
"Rasterizer edge fixed point precision < required conservative rast precision"); |
@@ -831,6 +831,26 @@ struct UnrollerL<End, End, Step> { | |||
} | |||
}; | |||
// helper function to unroll loops, with mask to skip specific iterations | |||
template<int Begin, int End, int Step = 1, int Mask = 0x7f> | |||
struct UnrollerLMask { | |||
template<typename Lambda> | |||
INLINE static void step(Lambda& func) { | |||
if(Mask & (1 << Begin)) | |||
{ | |||
func(Begin); | |||
} | |||
UnrollerL<Begin + Step, End, Step>::step(func); | |||
} | |||
}; | |||
template<int End, int Step, int Mask> | |||
struct UnrollerLMask<End, End, Step, Mask> { | |||
template<typename Lambda> | |||
static void step(Lambda& func) { | |||
} | |||
}; | |||
// general CRC compute | |||
INLINE | |||
uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size) |