Explorar el Código

swr: [rasterizer core] conservative rast degenerate handling

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
tags/13.0-branchpoint
Tim Rowley hace 9 años
padre
commit
9f7d99fcfe

+ 0
- 8
src/gallium/drivers/swr/rasterizer/core/conservativeRast.h Ver fichero

@@ -109,8 +109,6 @@ template <>
struct ConservativeRastFETraits<StandardRastT>
{
typedef std::false_type IsConservativeT;
typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT;
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
};

//////////////////////////////////////////////////////////////////////////
@@ -119,13 +117,7 @@ template <>
struct ConservativeRastFETraits<ConservativeRastT>
{
typedef std::true_type IsConservativeT;
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;

/// Conservative bounding box needs to expand the area around each vertex by 1/512, which
/// is the potential snapping error when going from FP-> 16.8 fixed
typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT;
typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT;
};

//////////////////////////////////////////////////////////////////////////

+ 112
- 71
src/gallium/drivers/swr/rasterizer/core/frontend.cpp Ver fichero

@@ -1446,7 +1446,7 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
/// @param pLinkageMap - maps VS attribute slot to PS slot
/// @param triIndex - Triangle to process attributes for
/// @param pBuffer - Output result
template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate>
INLINE void ProcessAttributes(
DRAW_CONTEXT *pDC,
PA_STATE&pa,
@@ -1456,7 +1456,8 @@ INLINE void ProcessAttributes(
{
static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
LONG constantInterpMask = backendState.constantInterpolationMask;
// Conservative Rasterization requires degenerate tris to have constant attribute interpolation
LONG constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;

@@ -1483,7 +1484,7 @@ INLINE void ProcessAttributes(
__m128 attrib[3]; // triangle attribs (always 4 wide)
float* pAttribStart = pBuffer;

if (HasConstantInterpT::value)
if (HasConstantInterpT::value || IsDegenerate::value)
{
if (_bittest(&constantInterpMask, i))
{
@@ -1605,9 +1606,9 @@ struct ProcessAttributesChooser
}
};

PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false)
{
return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp);
return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
}

//////////////////////////////////////////////////////////////////////////
@@ -1668,38 +1669,19 @@ INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn)

//////////////////////////////////////////////////////////////////////////
/// @brief Helper function to set the X,Y coords of a triangle to the
/// requested Fixed Point precision from FP32. If the RequestedT
/// FixedPointTraits precision is the same as the CurrentT, no extra
/// conversions will be done. If they are different, convert from FP32
/// to the Requested precision and set vXi, vYi
/// @tparam RequestedT: requested FixedPointTraits type
/// @tparam CurrentT: FixedPointTraits type of the last
template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>>
struct FPToFixedPoint
/// requested Fixed Point precision from FP32.
/// @param tri: simdvector[3] of FP triangle verts
/// @param vXi: fixed point X coords of tri verts
/// @param vYi: fixed point Y coords of tri verts
INLINE static void FPToFixedPoint(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
{
//////////////////////////////////////////////////////////////////////////
/// @param tri: simdvector[3] of FP triangle verts
/// @param vXi: fixed point X coords of tri verts
/// @param vYi: fixed point Y coords of tri verts
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
{
vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x);
vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y);
vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x);
vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y);
vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x);
vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y);
};
};

//////////////////////////////////////////////////////////////////////////
/// @brief In the case where the RequestedT and CurrentT fixed point
/// precisions are the same, do nothing.
template<typename RequestedT>
struct FPToFixedPoint<RequestedT, RequestedT>
{
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){};
};
vXi[0] = fpToFixedPointVertical(tri[0].x);
vYi[0] = fpToFixedPointVertical(tri[0].y);
vXi[1] = fpToFixedPointVertical(tri[1].x);
vYi[1] = fpToFixedPointVertical(tri[1].y);
vXi[2] = fpToFixedPointVertical(tri[2].x);
vYi[2] = fpToFixedPointVertical(tri[2].y);
}

//////////////////////////////////////////////////////////////////////////
/// @brief Calculate bounding box for current triangle
@@ -1710,20 +1692,8 @@ struct FPToFixedPoint<RequestedT, RequestedT>
/// *Note*: expects vX, vY to be in the correct precision for the type
/// of rasterization. This avoids unnecessary FP->fixed conversions.
template <typename CT>
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){}

//////////////////////////////////////////////////////////////////////////
/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical
template <>
INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
{
// FE conservative rast traits
typedef FEStandardRastT CT;

static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision");
// Update vXi, vYi fixed point precision for BBox calculation if necessary
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);

simdscalari vMinX = vX[0];
vMinX = _simd_min_epi32(vMinX, vX[1]);
vMinX = _simd_min_epi32(vMinX, vX[2]);
@@ -1755,10 +1725,6 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
// FE conservative rast traits
typedef FEConservativeRastT CT;

static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision");
// Update vXi, vYi fixed point precision for BBox calculation if necessary
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);

simdscalari vMinX = vX[0];
vMinX = _simd_min_epi32(vMinX, vX[1]);
vMinX = _simd_min_epi32(vMinX, vX[2]);
@@ -1776,10 +1742,11 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
/// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
}

//////////////////////////////////////////////////////////////////////////
@@ -1808,10 +1775,6 @@ void BinTriangles(
const SWR_GS_STATE& gsState = state.gsState;
MacroTileMgr *pTileMgr = pDC->pTileMgr;

// Select attribute processor
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);


simdscalar vRecipW0 = _simd_set1_ps(1.0f);
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
@@ -1852,8 +1815,8 @@ void BinTriangles(
tri[2].y = _simd_add_ps(tri[2].y, offset);

simdscalari vXi[3], vYi[3];
// Set vXi, vYi to fixed point precision required for degenerate triangle check
FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi);
// Set vXi, vYi to required fixed point precision
FPToFixedPoint(tri, vXi, vYi);

// triangle setup
simdscalari vAi[3], vBi[3];
@@ -1863,8 +1826,6 @@ void BinTriangles(
simdscalari vDet[2];
calcDeterminantIntVertical(vAi, vBi, vDet);

/// todo: handle degen tri's for Conservative Rast.

// cull zero area
int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si())));
int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si())));
@@ -1872,11 +1833,15 @@ void BinTriangles(
int cullZeroAreaMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH / 2));

uint32_t origTriMask = triMask;
triMask &= ~cullZeroAreaMask;
// don't cull degenerate triangles if we're conservatively rasterizing
if(!CT::IsConservativeT::value)
{
triMask &= ~cullZeroAreaMask;
}

// determine front winding tris
// CW +det
// CCW -det
// CCW det <= 0; 0 area triangles are marked as backfacing, which is required behavior for conservative rast
maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[0], _simd_setzero_si())));
maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[1], _simd_setzero_si())));
int cwTriMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH /2) );
@@ -1898,6 +1863,7 @@ void BinTriangles(
case SWR_CULLMODE_BOTH: cullTris = 0xffffffff; break;
case SWR_CULLMODE_NONE: cullTris = 0x0; break;
case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break;
// 0 area triangles are marked as backfacing, which is required behavior for conservative rast
case SWR_CULLMODE_BACK: cullTris = ~frontWindingTris; break;
default: SWR_ASSERT(false, "Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break;
}
@@ -1916,9 +1882,53 @@ void BinTriangles(
DWORD triIndex = 0;
// for center sample pattern, all samples are at pixel center; calculate coverage
// once at center and broadcast the results in the backend
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
PFN_WORK_FUNC pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
pDC->pState->state.psState.inputCoverage, (rastState.scissorEnable > 0));
const SWR_MULTISAMPLE_COUNT sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
uint32_t edgeEnable;
PFN_WORK_FUNC pfnWork;
if(CT::IsConservativeT::value)
{
// determine which edges of the degenerate tri, if any, are valid to rasterize.
// used to call the appropriate templated rasterizer function
if(cullZeroAreaMask > 0)
{
// e0 = v1-v0
simdscalari x0x1Mask = _simd_cmpeq_epi32(vXi[0], vXi[1]);
simdscalari y0y1Mask = _simd_cmpeq_epi32(vYi[0], vYi[1]);
uint32_t e0Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x0x1Mask, y0y1Mask)));

// e1 = v2-v1
simdscalari x1x2Mask = _simd_cmpeq_epi32(vXi[1], vXi[2]);
simdscalari y1y2Mask = _simd_cmpeq_epi32(vYi[1], vYi[2]);
uint32_t e1Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x1x2Mask, y1y2Mask)));

// e2 = v0-v2
// if v0 == v1 & v1 == v2, v0 == v2
uint32_t e2Mask = e0Mask & e1Mask;
SWR_ASSERT(KNOB_SIMD_WIDTH == 8, "Need to update degenerate mask code for avx512");

// edge order: e0 = v0v1, e1 = v1v2, e2 = v0v2
// 32 bit binary: 0000 0000 0010 0100 1001 0010 0100 1001
e0Mask = pdep_u32(e0Mask, 0x00249249);
// 32 bit binary: 0000 0000 0100 1001 0010 0100 1001 0010
e1Mask = pdep_u32(e1Mask, 0x00492492);
// 32 bit binary: 0000 0000 1001 0010 0100 1001 0010 0100
e2Mask = pdep_u32(e2Mask, 0x00924924);

edgeEnable = (0x00FFFFFF & (~(e0Mask | e1Mask | e2Mask)));
}
else
{
edgeEnable = 0x00FFFFFF;
}
}
else
{
// degenerate triangles won't be sent to rasterizer; just enable all edges
pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
(rastState.scissorEnable > 0));
}

if (!triMask)
{
goto endBinTriangles;
@@ -1969,6 +1979,16 @@ void BinTriangles(
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));

if(CT::IsConservativeT::value)
{
// in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
// some area. Bump the right/bottom edges out
simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom);
bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom);
simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right);
bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight);
}

// Cull tris completely outside scissor
{
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
@@ -2026,7 +2046,28 @@ void BinTriangles(

BE_WORK work;
work.type = DRAW;
work.pfnWork = pfnWork;
bool isDegenerate;
if(CT::IsConservativeT::value)
{
// only rasterize valid edges if we have a degenerate primitive
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
work.pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
(rastState.scissorEnable > 0));

// Degenerate triangles are required to be constant interpolated
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
}
else
{
isDegenerate = false;
work.pfnWork = pfnWork;
}

// Select attribute processor
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate);

TRIANGLE_WORK_DESC &desc = work.desc.tri;


+ 169
- 58
src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp Ver fichero

@@ -88,7 +88,7 @@ struct EDGE
/// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C)
/// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad.
/// Used to step between quads when sweeping over the raster tile.
template<uint32_t NumEdges>
template<uint32_t NumEdges, typename EdgeMaskT>
INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges)
{
uint64_t coverageMask = 0;
@@ -120,25 +120,25 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg

// evaluate which pixels in the quad are covered
#define EVAL \
UnrollerL<0, NumEdges, 1>::step(eval_lambda);
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);

// update coverage mask
#define UPDATE_MASK(bit) \
mask = edgeMask[0]; \
UnrollerL<1, NumEdges, 1>::step(update_lambda); \
UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
coverageMask |= (mask << bit);

// step in the +x direction to the next quad
#define INCX \
UnrollerL<0, NumEdges, 1>::step(incx_lambda);
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);

// step in the +y direction to the next quad
#define INCY \
UnrollerL<0, NumEdges, 1>::step(incy_lambda);
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);

// step in the -x direction to the next quad
#define DECX \
UnrollerL<0, NumEdges, 1>::step(decx_lambda);
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);

// sweep 2x2 quad back and forth through the raster tile,
// computing coverage masks for the entire tile
@@ -274,6 +274,17 @@ INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256
vEdge = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]);
}

//////////////////////////////////////////////////////////////////////////
/// @brief calculates difference in precision between the result of manh
/// calculation and the edge precision, based on compile time trait values
template<typename RT>
constexpr int64_t ManhToEdgePrecisionAdjust()
{
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
"Inadequate precision of result of manh calculation ");
return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
}

//////////////////////////////////////////////////////////////////////////
/// @struct adjustEdgeConservative
/// @brief Primary template definition used for partially specializing
@@ -306,15 +317,15 @@ struct adjustEdgeConservative<RT, std::true_type>
/// instead of having to test individual pixel corners for conservative coverage
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
{
/// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
/// from the pixel center (in the direction of the edge normal A/B)
// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
// from the pixel center (in the direction of the edge normal A/B)

/// edge = Ax + Bx + C - (manh/e)
/// manh = manhattan distance = abs(A) + abs(B)
/// e = absolute rounding error from snapping from float to fixed point precision
// edge = Ax + Bx + C - (manh/e)
// manh = manhattan distance = abs(A) + abs(B)
// e = absolute rounding error from snapping from float to fixed point precision

/// 'fixed point' multiply (in double to be avx1 friendly)
/// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
// 'fixed point' multiply (in double to be avx1 friendly)
// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
__m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
__m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)),
_mm256_mul_pd(vBai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)));
@@ -322,15 +333,13 @@ struct adjustEdgeConservative<RT, std::true_type>
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
"Inadequate precision of result of manh calculation ");
/// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
/// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
manh = _mm256_mul_pd(manh, _mm256_set1_pd(((RT::PrecisionT::BitsT::value +
RT::ConservativePrecisionT::BitsT::value) -
RT::EdgePrecisionT::BitsT::value) * 0.5));

/// move the edge away from the pixel center by the required conservative precision + 1/2 pixel
/// this allows the rasterizer to do a single conservative coverage test to see if the primitive
/// intersects the pixel at all
// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5));

// move the edge away from the pixel center by the required conservative precision + 1/2 pixel
// this allows the rasterizer to do a single conservative coverage test to see if the primitive
// intersects the pixel at all
vEdge = _mm256_sub_pd(vEdge, manh);
};
};
@@ -346,6 +355,19 @@ struct adjustEdgeConservative<RT, std::false_type>
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge){};
};

//////////////////////////////////////////////////////////////////////////
/// @brief calculates the distance a degenerate BBox needs to be adjusted
/// for conservative rast based on compile time trait values
template<typename RT>
constexpr int64_t ConservativeScissorOffset()
{
static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision");
// if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges
typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT;
// 1/2 pixel edge offset + conservative offset - degenerateTriangle
return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
}

//////////////////////////////////////////////////////////////////////////
/// @brief Performs calculations to adjust each a scalar edge out
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
@@ -354,13 +376,7 @@ template <typename RT>
INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge)
{
int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));

int64_t manh = ((aabs * RT::ConservativeEdgeOffsetT::value) + (babs * RT::ConservativeEdgeOffsetT::value)) >>
((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);

static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
"Inadequate precision of result of manh calculation ");

int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>();
vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
};

@@ -371,7 +387,7 @@ INLINE void adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vE
{
static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
"Edge equation expected to be in x.16 fixed point");
/// need to offset the edge before applying the top-left rule
// need to offset the edge before applying the top-left rule
adjustEdgeConservative<RT, typename RT::IsConservativeT>(vAi, vBi, vEdge);

adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
@@ -563,14 +579,13 @@ struct ComputeScissorEdges
template <typename RT>
struct ComputeScissorEdges<std::true_type, std::true_type, RT>
{

//////////////////////////////////////////////////////////////////////////
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
/// evaluate edge equations and offset them away from pixel center.
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
{
/// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
BBOX scissor;
scissor.left = std::max(triBBox.left, scissorBBox.left);
scissor.right = std::min(triBBox.right, scissorBBox.right);
@@ -593,7 +608,7 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));

/// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
@@ -632,6 +647,81 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
}
};

//////////////////////////////////////////////////////////////////////////
/// @brief Primary function template for TrivialRejectTest. Should
/// never be called, but TemplateUnroller instantiates a few unused values,
/// so it calls a runtime assert instead of a static_assert.
template <typename ValidEdgeMaskT>
INLINE bool TrivialRejectTest(const int, const int, const int)
{
SWR_ASSERT(0, "Primary templated function should never be called");
return false;
};

//////////////////////////////////////////////////////////////////////////
/// @brief E0E1ValidT specialization of TrivialRejectTest. Tests edge 0
/// and edge 1 for trivial coverage reject
template <>
INLINE bool TrivialRejectTest<E0E1ValidT>(const int mask0, const int mask1, const int)
{
return (!(mask0 && mask1)) ? true : false;
};

//////////////////////////////////////////////////////////////////////////
/// @brief E0E2ValidT specialization of TrivialRejectTest. Tests edge 0
/// and edge 2 for trivial coverage reject
template <>
INLINE bool TrivialRejectTest<E0E2ValidT>(const int mask0, const int, const int mask2)
{
return (!(mask0 && mask2)) ? true : false;
};

//////////////////////////////////////////////////////////////////////////
/// @brief E1E2ValidT specialization of TrivialRejectTest. Tests edge 1
/// and edge 2 for trivial coverage reject
template <>
INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int mask2)
{
return (!(mask1 && mask2)) ? true : false;
};

//////////////////////////////////////////////////////////////////////////
/// @brief AllEdgesValidT specialization of TrivialRejectTest. Tests all
/// primitive edges for trivial coverage reject
template <>
INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
{
return (!(mask0 && mask1 && mask2)) ? true : false;;
};

//////////////////////////////////////////////////////////////////////////
/// @brief NoEdgesValidT specialization of TrivialRejectTest. Degenerate
/// point, so return false and rasterize against conservative BBox
template <>
INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int)
{
return false;
};

//////////////////////////////////////////////////////////////////////////
/// @brief Primary function template for TrivialAcceptTest. Always returns
/// false, since it will only be called for degenerate tris, and as such
/// will never cover the entire raster tile
template <typename ValidEdgeMaskT>
INLINE bool TrivialAcceptTest(const int, const int, const int)
{
return false;
};

//////////////////////////////////////////////////////////////////////////
/// @brief AllEdgesValidT specialization for TrivialAcceptTest. Test all
/// edge masks for a fully covered raster tile
template <>
INLINE bool TrivialAcceptTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
{
return ((mask0 & mask1 & mask2) == 0xf);
};

template <typename RT>
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
{
@@ -681,8 +771,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// determinant
float det = calcDeterminantInt(vAi, vBi);

/// Verts in Pixel Coordinate Space at this point
/// Det > 0 = CW winding order
// Verts in Pixel Coordinate Space at this point
// Det > 0 = CW winding order
// Convert CW triangles to CCW
if (det > 0.0)
{
@@ -693,28 +783,39 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
det = -det;
}

/// @todo: handle degenerates for ConservativeRast

__m128 vC;
// Finish triangle setup - C edge coef
triangleSetupC(vX, vY, vA, vB, vC);

// compute barycentric i and j
// i = (A1x + B1y + C1)/det
// j = (A2x + B2y + C2)/det
__m128 vDet = _mm_set1_ps(det);
__m128 vRecipDet = _mm_div_ps(_mm_set1_ps(1.0f), vDet);//_mm_rcp_ps(vDet);
_mm_store_ss(&triDesc.recipDet, vRecipDet);

// only extract coefs for 2 of the barycentrics; the 3rd can be
// determined from the barycentric equation:
// i + j + k = 1 <=> k = 1 - j - i
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
_MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
_MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
_MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
// If we have degenerate edge(s) to rasterize, set I and J coefs
// to 0 for constant interpolation of attributes
triDesc.I[0] = 0.0f;
triDesc.I[1] = 0.0f;
triDesc.I[2] = 0.0f;
triDesc.J[0] = 0.0f;
triDesc.J[1] = 0.0f;
triDesc.J[2] = 0.0f;

// Degenerate triangles have no area
triDesc.recipDet = 0.0f;
}
else
{
// only extract coefs for 2 of the barycentrics; the 3rd can be
// determined from the barycentric equation:
// i + j + k = 1 <=> k = 1 - j - i
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
_MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
_MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
_MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);

// compute recipDet, used to calculate barycentric i and j in the backend
triDesc.recipDet = 1.0f/det;
}

OSALIGNSIMD(float) oneOverW[4];
_mm_store_ps(oneOverW, vRecipW);
@@ -764,6 +865,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
OSALIGNSIMD(BBOX) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);

if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
bbox.left--; bbox.right++; bbox.top--; bbox.bottom++;
SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0,
"Conservative rast degenerate handling requires a valid scissor rect");
}

// Intersect with scissor/viewport
OSALIGNSIMD(BBOX) intersect;
intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
@@ -941,13 +1050,13 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
for (uint32_t sampleNum = 0; sampleNum < NumRasterSamplesT::value; sampleNum++)
{
// trivial reject, at least one edge has all 4 corners of raster tile outside
bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false;
bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);

if (!trivialReject)
{
// trivial accept mask
triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL;
if ((mask0 & mask1 & mask2) == 0xf)
if (TrivialAcceptTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2))
{
triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum];
// trivial accept, all 4 corners of all 3 edges are negative
@@ -991,7 +1100,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,

// not trivial accept or reject, must rasterize full tile
RDTSC_START(BERasterizePartial);
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value>(pDC, startQuadEdges, rastEdges);
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
RDTSC_STOP(BERasterizePartial, 0, 0);

triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
@@ -1101,7 +1210,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
// once at center and broadcast the results in the backend
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
// conservative rast not supported for points/lines
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0));

// overwrite texcoords for point sprites
if (isPointSpriteTexCoordEnabled)
@@ -1429,7 +1538,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
PFN_WORK_FUNC pfnTriRast;
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
// conservative rast not supported for points/lines
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0));

// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
@@ -1541,6 +1650,7 @@ PFN_WORK_FUNC GetRasterizerFunc(
uint32_t numSamples,
bool IsConservative,
uint32_t InputCoverage,
uint32_t EdgeEnable,
bool RasterizeScissorEdges
)
{
@@ -1548,5 +1658,6 @@ PFN_WORK_FUNC GetRasterizerFunc(
IntArg<SWR_MULTISAMPLE_1X,SWR_MULTISAMPLE_TYPE_COUNT-1>{numSamples},
IsConservative,
IntArg<SWR_INPUT_COVERAGE_NONE, SWR_INPUT_COVERAGE_COUNT-1>{InputCoverage},
IntArg<0, VALID_TRI_EDGE_COUNT-1>{EdgeEnable},
RasterizeScissorEdges);
}

+ 31
- 7
src/gallium/drivers/swr/rasterizer/core/rasterizer.h Ver fichero

@@ -48,8 +48,28 @@ PFN_WORK_FUNC GetRasterizerFunc(
uint32_t numSamples,
bool IsConservative,
uint32_t InputCoverage,
uint32_t EdgeEnable,
bool RasterizeScissorEdges);

enum ValidTriEdges
{
NO_VALID_EDGES = 0,
E0_E1_VALID = 0x3,
E0_E2_VALID = 0x5,
E1_E2_VALID = 0x6,
ALL_EDGES_VALID = 0x7,
VALID_TRI_EDGE_COUNT,
};

//////////////////////////////////////////////////////////////////////////
/// @brief ValidTriEdges convenience typedefs used for templated function
/// specialization supported Fixed Point precisions
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT;
typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;

//////////////////////////////////////////////////////////////////////////
/// @struct RasterScissorEdgesT
/// @brief Primary RasterScissorEdgesT templated struct that holds compile
@@ -59,22 +79,26 @@ PFN_WORK_FUNC GetRasterizerFunc(
/// 3 triangle edges + 4 scissor edges for coverage.
/// @tparam RasterScissorEdgesT: number of multisamples
/// @tparam ConservativeT: is this a conservative rasterization
template <typename RasterScissorEdgesT, typename ConservativeT>
/// @tparam EdgeMaskT: Which edges are valid(not degenerate)
template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT>
struct RasterEdgeTraits
{
typedef std::true_type RasterizeScissorEdgesT;
typedef std::integral_constant<uint32_t, 7> NumEdgesT;
typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
};

//////////////////////////////////////////////////////////////////////////
/// @brief specialization of RasterEdgeTraits. If neither scissor rect
/// nor conservative rast is enabled, only test 3 triangle edges
/// for coverage
template <>
struct RasterEdgeTraits<std::false_type, std::false_type>
template <typename EdgeMaskT>
struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
{
typedef std::false_type RasterizeScissorEdgesT;
typedef std::integral_constant<uint32_t, 3> NumEdgesT;
// no need for degenerate edge masking in non-conservative case; rasterize all triangle edges
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT;
};

//////////////////////////////////////////////////////////////////////////
@@ -86,19 +110,19 @@ struct RasterEdgeTraits<std::false_type, std::false_type>
/// @tparam InputCoverageT: what type of input coverage is the PS expecting?
/// (only used with conservative rasterization)
/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename RasterScissorEdgesT>
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT>
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT>
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
{
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value)> MT;
/// Fixed point precision the rasterizer is using
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
/// Fixed point precision of the edge tests used during rasterization
typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;

// If conservative rast is enabled, only need a single sample coverage test, with the result copied to all samples
typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT;
typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT;

static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value,
"Rasterizer edge fixed point precision < required conservative rast precision");

+ 20
- 0
src/gallium/drivers/swr/rasterizer/core/utils.h Ver fichero

@@ -831,6 +831,26 @@ struct UnrollerL<End, End, Step> {
}
};

// helper function to unroll loops, with mask to skip specific iterations
template<int Begin, int End, int Step = 1, int Mask = 0x7f>
struct UnrollerLMask {
template<typename Lambda>
INLINE static void step(Lambda& func) {
if(Mask & (1 << Begin))
{
func(Begin);
}
UnrollerL<Begin + Step, End, Step>::step(func);
}
};

template<int End, int Step, int Mask>
struct UnrollerLMask<End, End, Step, Mask> {
template<typename Lambda>
static void step(Lambda& func) {
}
};

// general CRC compute
INLINE
uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)

Cargando…
Cancelar
Guardar