Преглед изворни кода

amd/addrlib: update to the latest version

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
master
Marek Olšák пре 6 година
родитељ
комит
69ea473eeb

+ 42
- 42
src/amd/addrlib/inc/addrinterface.h Прегледај датотеку

@@ -308,7 +308,8 @@ typedef union _ADDR_CREATE_FLAGS
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility
UINT_32 reserved : 24; ///< Reserved bits for future use
UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2
UINT_32 reserved : 23; ///< Reserved bits for future use
};

UINT_32 value;
@@ -347,9 +348,6 @@ typedef struct _ADDR_REGISTER_VALUE
///< CI registers-------------------------------------------------
const UINT_32* pMacroTileConfig; ///< Global macro tile mode table
UINT_32 noOfMacroEntries; ///< Number of entries in pMacroTileConfig

///< GFX9 HW parameters
UINT_32 blockVarSizeLog2; ///< SW_VAR_* block size
} ADDR_REGISTER_VALUE;

/**
@@ -3549,12 +3547,14 @@ typedef union _ADDR2_BLOCK_SET
{
struct
{
UINT_32 micro : 1; // 256B block for 2D resource
UINT_32 macro4KB : 1; // 4KB for 2D/3D resource
UINT_32 macro64KB : 1; // 64KB for 2D/3D resource
UINT_32 var : 1; // VAR block
UINT_32 linear : 1; // Linear block
UINT_32 reserved : 27;
UINT_32 micro : 1; // 256B block for 2D resource
UINT_32 macroThin4KB : 1; // Thin 4KB for 2D/3D resource
UINT_32 macroThick4KB : 1; // Thick 4KB for 3D resource
UINT_32 macroThin64KB : 1; // Thin 64KB for 2D/3D resource
UINT_32 macroThick64KB : 1; // Thick 64KB for 3D resource
UINT_32 var : 1; // VAR block
UINT_32 linear : 1; // Linear block
UINT_32 reserved : 25;
};

UINT_32 value;
@@ -3594,38 +3594,38 @@ typedef union _ADDR2_SWMODE_SET
{
struct
{
UINT_32 swLinear : 1;
UINT_32 sw256B_S : 1;
UINT_32 sw256B_D : 1;
UINT_32 sw256B_R : 1;
UINT_32 sw4KB_Z : 1;
UINT_32 sw4KB_S : 1;
UINT_32 sw4KB_D : 1;
UINT_32 sw4KB_R : 1;
UINT_32 sw64KB_Z : 1;
UINT_32 sw64KB_S : 1;
UINT_32 sw64KB_D : 1;
UINT_32 sw64KB_R : 1;
UINT_32 swVar_Z : 1;
UINT_32 swVar_S : 1;
UINT_32 swVar_D : 1;
UINT_32 swVar_R : 1;
UINT_32 sw64KB_Z_T : 1;
UINT_32 sw64KB_S_T : 1;
UINT_32 sw64KB_D_T : 1;
UINT_32 sw64KB_R_T : 1;
UINT_32 sw4KB_Z_X : 1;
UINT_32 sw4KB_S_X : 1;
UINT_32 sw4KB_D_X : 1;
UINT_32 sw4KB_R_X : 1;
UINT_32 sw64KB_Z_X : 1;
UINT_32 sw64KB_S_X : 1;
UINT_32 sw64KB_D_X : 1;
UINT_32 sw64KB_R_X : 1;
UINT_32 swVar_Z_X : 1;
UINT_32 swVar_S_X : 1;
UINT_32 swVar_D_X : 1;
UINT_32 swVar_R_X : 1;
UINT_32 swLinear : 1;
UINT_32 sw256B_S : 1;
UINT_32 sw256B_D : 1;
UINT_32 sw256B_R : 1;
UINT_32 sw4KB_Z : 1;
UINT_32 sw4KB_S : 1;
UINT_32 sw4KB_D : 1;
UINT_32 sw4KB_R : 1;
UINT_32 sw64KB_Z : 1;
UINT_32 sw64KB_S : 1;
UINT_32 sw64KB_D : 1;
UINT_32 sw64KB_R : 1;
UINT_32 swReserved0 : 1;
UINT_32 swReserved1 : 1;
UINT_32 swReserved2 : 1;
UINT_32 swReserved3 : 1;
UINT_32 sw64KB_Z_T : 1;
UINT_32 sw64KB_S_T : 1;
UINT_32 sw64KB_D_T : 1;
UINT_32 sw64KB_R_T : 1;
UINT_32 sw4KB_Z_X : 1;
UINT_32 sw4KB_S_X : 1;
UINT_32 sw4KB_D_X : 1;
UINT_32 sw4KB_R_X : 1;
UINT_32 sw64KB_Z_X : 1;
UINT_32 sw64KB_S_X : 1;
UINT_32 sw64KB_D_X : 1;
UINT_32 sw64KB_R_X : 1;
UINT_32 swVar_Z_X : 1;
UINT_32 swReserved4 : 1;
UINT_32 swReserved5 : 1;
UINT_32 swVar_R_X : 1;
};

UINT_32 value;

+ 32
- 30
src/amd/addrlib/inc/addrtypes.h Прегледај датотеку

@@ -90,11 +90,7 @@ typedef int INT;
#if defined(BRAHMA_ARM)
#define ADDR_FASTCALL
#elif defined(__GNUC__)
#if defined(__i386__)
#define ADDR_FASTCALL __attribute__((regparm(0)))
#else
#define ADDR_FASTCALL
#endif
#define ADDR_FASTCALL __attribute__((regparm(0)))
#else
#define ADDR_FASTCALL __fastcall
#endif
@@ -203,22 +199,32 @@ typedef enum _AddrTileMode
/**
****************************************************************************************************
* @brief
* Neutral enums that define swizzle modes for Gfx9 ASIC
* Neutral enums that define swizzle modes for Gfx9+ ASIC
* @note
*
* ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resouce
* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resouce
* ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resouce
* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resouce
* ADDR_SW_VAR_* addressing block aligned size is ASIC specific, for 2D/3D resouce
* ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resource
* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resource
* ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resource
* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resource
*
* ADDR_SW_*_Z For 2D resouce, represents Z-order swizzle mode for depth/stencil/FMask
For 3D resouce, represents a swizzle mode similar to legacy thick tile mode
* ADDR_SW_*_S represents standard swizzle mode defined by MS
* ADDR_SW_*_D For 2D resouce, represents a swizzle mode for displayable resource
* For 3D resouce, represents a swizzle mode which places each slice in order & pixel
* ADDR_SW_*_Z For GFX9:
- for 2D resource, represents Z-order swizzle mode for depth/stencil/FMask
- for 3D resource, represents a swizzle mode similar to legacy thick tile mode
For GFX10:
- represents Z-order swizzle mode for depth/stencil/FMask
* ADDR_SW_*_S For GFX9+:
- represents standard swizzle mode defined by MS
* ADDR_SW_*_D For GFX9:
- for 2D resource, represents a swizzle mode for displayable resource
* - for 3D resource, represents a swizzle mode which places each slice in order & pixel
For GFX10:
- for 2D resource, represents a swizzle mode for displayable resource
- for 3D resource, represents a swizzle mode similar to legacy thick tile mode
within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible!
* ADDR_SW_*_R For 2D resouce only, represents a swizzle mode for rotated displayable resource
* ADDR_SW_*_R For GFX9:
- 2D resource only, represents a swizzle mode for rotated displayable resource
For GFX10:
- represents a swizzle mode for render target resource
*
****************************************************************************************************
*/
@@ -236,10 +242,10 @@ typedef enum _AddrSwizzleMode
ADDR_SW_64KB_S = 9,
ADDR_SW_64KB_D = 10,
ADDR_SW_64KB_R = 11,
ADDR_SW_VAR_Z = 12,
ADDR_SW_VAR_S = 13,
ADDR_SW_VAR_D = 14,
ADDR_SW_VAR_R = 15,
ADDR_SW_RESERVED0 = 12,
ADDR_SW_RESERVED1 = 13,
ADDR_SW_RESERVED2 = 14,
ADDR_SW_RESERVED3 = 15,
ADDR_SW_64KB_Z_T = 16,
ADDR_SW_64KB_S_T = 17,
ADDR_SW_64KB_D_T = 18,
@@ -253,17 +259,11 @@ typedef enum _AddrSwizzleMode
ADDR_SW_64KB_D_X = 26,
ADDR_SW_64KB_R_X = 27,
ADDR_SW_VAR_Z_X = 28,
ADDR_SW_VAR_S_X = 29,
ADDR_SW_VAR_D_X = 30,
ADDR_SW_RESERVED4 = 29,
ADDR_SW_RESERVED5 = 30,
ADDR_SW_VAR_R_X = 31,
ADDR_SW_LINEAR_GENERAL = 32,
ADDR_SW_MAX_TYPE = 33,

// Used for represent block with identical size
ADDR_SW_256B = ADDR_SW_256B_S,
ADDR_SW_4KB = ADDR_SW_4KB_S_X,
ADDR_SW_64KB = ADDR_SW_64KB_S_X,
ADDR_SW_VAR = ADDR_SW_VAR_S_X,
} AddrSwizzleMode;

/**
@@ -316,7 +316,9 @@ typedef enum _AddrSwType
ADDR_SW_Z = 0, // Resource basic swizzle mode is ZOrder
ADDR_SW_S = 1, // Resource basic swizzle mode is Standard
ADDR_SW_D = 2, // Resource basic swizzle mode is Display
ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated
ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated/Render optimized
ADDR_SW_L = 4, // Resource basic swizzle mode is Linear
ADDR_SW_MAX_SWTYPE
} AddrSwType;

/**

+ 0
- 1
src/amd/addrlib/src/addrinterface.cpp Прегледај датотеку

@@ -60,7 +60,6 @@ ADDR_E_RETURNCODE ADDR_API AddrCreate(
ADDR_CREATE_OUTPUT* pAddrCreateOut) ///< [out] address lib handle
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;

{
returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut);
}

+ 24
- 8
src/amd/addrlib/src/core/addrcommon.h Прегледај датотеку

@@ -46,12 +46,16 @@

// ADDR_LNX_KERNEL_BUILD is for internal build
// Moved from addrinterface.h so __KERNEL__ is not needed any more
#if !defined(__APPLE__) || defined(HAVE_TSERVER)
#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
#include <string.h>
#elif !defined(__APPLE__) || defined(HAVE_TSERVER)
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#endif

#include <assert.h>
#include "util/macros.h"

////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -156,11 +160,7 @@
#endif // DEBUG
////////////////////////////////////////////////////////////////////////////////////////////////////

#if defined(static_assert)
#define ADDR_C_ASSERT(__e) static_assert(__e, "")
#else
#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
#endif
#define ADDR_C_ASSERT(__e) STATIC_ASSERT(__e)

namespace Addr
{
@@ -270,7 +270,8 @@ union ConfigFlags
UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear
UINT_32 use32bppFor422Fmt : 1; ///< View 422 formats as 32 bits per pixel element
UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility
UINT_32 reserved : 20; ///< Reserved bits for future use
UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2
UINT_32 reserved : 19; ///< Reserved bits for future use
};

UINT_32 value;
@@ -926,6 +927,21 @@ static inline UINT_32 GetCoordActiveMask(
return mask;
}

/**
****************************************************************************************************
* ShiftCeil
*
* @brief
* Apply righ-shift with ceiling
****************************************************************************************************
*/
static inline UINT_32 ShiftCeil(
UINT_32 a, ///< [in] value to be right-shifted
UINT_32 b) ///< [in] number of bits to shift
{
return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0);
}

} // Addr

#endif // __ADDR_COMMON_H__

+ 1
- 0
src/amd/addrlib/src/core/addrlib.cpp Прегледај датотеку

@@ -250,6 +250,7 @@ ADDR_E_RETURNCODE Lib::Create(
pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign;
pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
pLib->m_configFlags.forceDccAndTcCompat = pCreateIn->createFlags.forceDccAndTcCompat;
pLib->m_configFlags.nonPower2MemConfig = pCreateIn->createFlags.nonPower2MemConfig;
pLib->m_configFlags.disableLinearOpt = FALSE;

pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);

+ 162
- 58
src/amd/addrlib/src/core/addrlib2.cpp Прегледај датотеку

@@ -73,7 +73,8 @@ Lib::Lib()
m_rbPerSeLog2(0),
m_maxCompFragLog2(0),
m_pipeInterleaveLog2(0),
m_blockVarSizeLog2(0)
m_blockVarSizeLog2(0),
m_numEquations(0)
{
}

@@ -98,7 +99,8 @@ Lib::Lib(const Client* pClient)
m_rbPerSeLog2(0),
m_maxCompFragLog2(0),
m_pipeInterleaveLog2(0),
m_blockVarSizeLog2(0)
m_blockVarSizeLog2(0),
m_numEquations(0)
{
}

@@ -1363,35 +1365,65 @@ ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_E_RETURNCODE returnCode = ComputeBlockDimension(pWidth,
pHeight,
pDepth,
bpp,
resourceType,
swizzleMode);
ADDR_E_RETURNCODE returnCode = ADDR_OK;

if ((returnCode == ADDR_OK) && (numSamples > 1) && IsThin(resourceType, swizzleMode))
if (IsThick(resourceType, swizzleMode))
{
const UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 log2sample = Log2(numSamples);
const UINT_32 q = log2sample >> 1;
const UINT_32 r = log2sample & 1;

if (log2blkSize & 1)
{
*pWidth >>= q;
*pHeight >>= (q + r);
}
else
{
*pWidth >>= (q + r);
*pHeight >>= q;
}
ComputeThickBlockDimension(pWidth, pHeight, pDepth, bpp, resourceType, swizzleMode);
}
else if (IsThin(resourceType, swizzleMode))
{
ComputeThinBlockDimension(pWidth, pHeight, pDepth, bpp, numSamples, resourceType, swizzleMode);
}
else
{
ADDR_ASSERT_ALWAYS();
returnCode = ADDR_INVALIDPARAMS;
}

return returnCode;
}

/**
************************************************************************************************************************
* Lib::ComputeThinBlockDimension
*
* @brief
* Internal function to get thin block width/height/depth in element from surface input params.
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Lib::ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_ASSERT(IsThin(resourceType, swizzleMode));

// GFX9/GFX10 use different dimension amplifying logic: say for 128KB block + 1xAA + 1BPE, the dimension of thin
// swizzle mode will be [256W * 512H] on GFX9 ASICs and [512W * 256H] on GFX10 ASICs. Since GFX10 is newer HWL so we
// make its implementation into base class (in order to save future change on new HWLs)
const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 log2EleBytes = Log2(bpp >> 3);
const UINT_32 log2Samples = Log2(Max(numSamples, 1u));
const UINT_32 log2NumEle = log2BlkSize - log2EleBytes - log2Samples;

// For "1xAA/4xAA cases" or "2xAA/8xAA + odd log2BlkSize cases", width == height or width == 2 * height;
// For other cases, height == width or height == 2 * width
const BOOL_32 widthPrecedent = ((log2Samples & 1) == 0) || ((log2BlkSize & 1) != 0);
const UINT_32 log2Width = (log2NumEle + (widthPrecedent ? 1 : 0)) / 2;

*pWidth = 1u << log2Width;
*pHeight = 1u << (log2NumEle - log2Width);
*pDepth = 1;
}

/**
************************************************************************************************************************
* Lib::ComputeBlockDimension
@@ -1404,42 +1436,22 @@ ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;

UINT_32 eleBytes = bpp >> 3;
UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);

if (IsThin(resourceType, swizzleMode))
if (IsThick(resourceType, swizzleMode))
{
UINT_32 log2blkSizeIn256B = log2blkSize - 8;
UINT_32 widthAmp = log2blkSizeIn256B / 2;
UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;

ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));

*pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
*pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
*pDepth = 1;
ComputeThickBlockDimension(pWidth, pHeight, pDepth, bpp, resourceType, swizzleMode);
}
else if (IsThick(resourceType, swizzleMode))
else if (IsThin(resourceType, swizzleMode))
{
UINT_32 log2blkSizeIn1KB = log2blkSize - 10;
UINT_32 averageAmp = log2blkSizeIn1KB / 3;
UINT_32 restAmp = log2blkSizeIn1KB % 3;

ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));

*pWidth = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
*pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
*pDepth = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
ComputeThinBlockDimension(pWidth, pHeight, pDepth, bpp, 0, resourceType, swizzleMode);
}
else
{
@@ -1450,6 +1462,42 @@ ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
return returnCode;
}

/**
************************************************************************************************************************
* Lib::ComputeThickBlockDimension
*
* @brief
* Internal function to get block width/height/depth in element for thick swizzle mode
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Lib::ComputeThickBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_ASSERT(IsThick(resourceType, swizzleMode));

const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 eleBytes = bpp >> 3;
const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);

ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));

const UINT_32 log2blkSizeIn1KB = log2BlkSize - 10;
const UINT_32 averageAmp = log2blkSizeIn1KB / 3;
const UINT_32 restAmp = log2blkSizeIn1KB % 3;

*pWidth = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
*pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
*pDepth = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
}

/**
************************************************************************************************************************
* Lib::GetMipTailDim
@@ -1469,11 +1517,11 @@ Dim3d Lib::GetMipTailDim(
UINT_32 blockDepth) const
{
Dim3d out = {blockWidth, blockHeight, blockDepth};
UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);

if (IsThick(resourceType, swizzleMode))
{
UINT_32 dim = log2blkSize % 3;
UINT_32 dim = log2BlkSize % 3;

if (dim == 0)
{
@@ -1490,11 +1538,22 @@ Dim3d Lib::GetMipTailDim(
}
else
{
if (log2blkSize & 1)
ADDR_ASSERT(IsThin(resourceType, swizzleMode));

// GFX9/GFX10 use different dimension shrinking logic for mipmap tail: say for 128KB block + 2BPE, the maximum
// dimension of mipmap tail level will be [256W * 128H] on GFX9 ASICs and [128W * 256H] on GFX10 ASICs. Since
// GFX10 is newer HWL so we make its implementation into base class, in order to save future change on new HWLs.
// And assert log2BlkSize will always be an even value on GFX9, so we never need the logic wrapped by DEBUG...
#if DEBUG
if ((log2BlkSize & 1) && (m_chipFamily == ADDR_CHIP_FAMILY_AI))
{
// Should never go here...
ADDR_ASSERT_ALWAYS();

out.h >>= 1;
}
else
#endif
{
out.w >>= 1;
}
@@ -1873,7 +1932,52 @@ VOID Lib::ComputeQbStereoInfo(
pOut->pixelHeight <<= 1;

// Double size
pOut->surfSize <<= 1;
pOut->surfSize <<= 1;
pOut->sliceSize <<= 1;
}

/**
************************************************************************************************************************
* Lib::FilterInvalidEqSwizzleMode
*
* @brief
* Filter out swizzle mode(s) if it doesn't have valid equation index
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Lib::FilterInvalidEqSwizzleMode(
ADDR2_SWMODE_SET& allowedSwModeSet,
AddrResourceType resourceType,
UINT_32 elemLog2
) const
{
if (resourceType != ADDR_RSRC_TEX_1D)
{
UINT_32 allowedSwModeSetVal = allowedSwModeSet.value;
const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(resourceType) - 1;
UINT_32 validSwModeSet = allowedSwModeSetVal;

for (UINT_32 swModeIdx = 0; validSwModeSet != 0; swModeIdx++)
{
if (validSwModeSet & 1)
{
if (m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] == ADDR_INVALID_EQUATION_INDEX)
{
allowedSwModeSetVal &= ~(1u << swModeIdx);
}
}

validSwModeSet >>= 1;
}

// Only apply the filtering if at least one valid swizzle mode remains
if (allowedSwModeSetVal != 0)
{
allowedSwModeSet.value = allowedSwModeSetVal;
}
}
}

} // V2

+ 73
- 29
src/amd/addrlib/src/core/addrlib2.h Прегледај датотеку

@@ -68,6 +68,8 @@ struct SwizzleModeFlags
UINT_32 isT : 1; // T mode

UINT_32 isRtOpt : 1; // mode opt for render target

UINT_32 reserved : 20; // Reserved bits
};

struct Dim2d
@@ -87,25 +89,14 @@ struct Dim3d
enum AddrBlockType
{
AddrBlockMicro = 0, // Resource uses 256B block
AddrBlock4KB = 1, // Resource uses 4KB block
AddrBlock64KB = 2, // Resource uses 64KB block
AddrBlockVar = 3, // Resource uses var block, only valid for GFX9
AddrBlockLinear = 4, // Resource uses linear swizzle mode

AddrBlockMaxTiledType = AddrBlock64KB + 1,
};

enum AddrBlockSet
{
AddrBlockSetMicro = 1 << AddrBlockMicro,
AddrBlockSetMacro4KB = 1 << AddrBlock4KB,
AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
AddrBlockSetVar = 1 << AddrBlockVar,
AddrBlockSetLinear = 1 << AddrBlockLinear,

AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
AddrBlockSet2dGfx10 = AddrBlockSetMicro | AddrBlockSetMacro,
AddrBlockSet3dGfx10 = AddrBlockSetMacro,
AddrBlockThin4KB = 1, // Resource uses thin 4KB block
AddrBlockThick4KB = 2, // Resource uses thick 4KB block
AddrBlockThin64KB = 3, // Resource uses thin 64KB block
AddrBlockThick64KB = 4, // Resource uses thick 64KB block
AddrBlockVar = 5, // Resource uses var block, only valid for GFX9
AddrBlockLinear = 6, // Resource uses linear swizzle mode

AddrBlockMaxTiledType = AddrBlockVar + 1,
};

enum AddrSwSet
@@ -115,11 +106,17 @@ enum AddrSwSet
AddrSwSetD = 1 << ADDR_SW_D,
AddrSwSetR = 1 << ADDR_SW_R,

AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
AddrSwSet3dThinGfx10 = AddrSwSetZ | AddrSwSetR,
AddrSwSetColorGfx10 = AddrSwSetS | AddrSwSetD | AddrSwSetR,
AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
};

const UINT_32 Size256 = 256u;
const UINT_32 Size4K = 4096u;
const UINT_32 Size64K = 65536u;

const UINT_32 Log2Size256 = 8u;
const UINT_32 Log2Size4K = 12u;
const UINT_32 Log2Size64K = 16u;

/**
************************************************************************************************************************
* @brief This class contains asic independent address lib functionalities
@@ -237,6 +234,15 @@ protected:

static const UINT_32 MaxMipLevels = 16;

BOOL_32 IsValidSwMode(AddrSwizzleMode swizzleMode) const
{
// Don't dereference a reinterpret_cast pointer so as not to break
// strict-aliasing rules.
UINT_32 mode;
memcpy(&mode, &m_swizzleModeTable[swizzleMode], sizeof(UINT_32));
return mode != 0;
}

// Checking block size
BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
{
@@ -356,7 +362,7 @@ protected:
{
blockSizeLog2 = 16;
}
else if (IsBlockVariable(swizzleMode))
else if (IsBlockVariable(swizzleMode) && (m_blockVarSizeLog2 != 0))
{
blockSizeLog2 = m_blockVarSizeLog2;
}
@@ -653,12 +659,29 @@ protected:
AddrSwizzleMode swizzleMode) const;

ADDR_E_RETURNCODE ComputeBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;

virtual VOID ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;

VOID ComputeThickBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;

static UINT_64 ComputePadSize(
const Dim3d* pBlkDim,
@@ -793,6 +816,11 @@ protected:

VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;

VOID FilterInvalidEqSwizzleMode(
ADDR2_SWMODE_SET& allowedSwModeSet,
AddrResourceType resourceType,
UINT_32 elemLog2) const;

UINT_32 m_se; ///< Number of shader engine
UINT_32 m_rbPerSe; ///< Number of render backend per shader engine
UINT_32 m_maxCompFrag; ///< Number of max compressed fragment
@@ -809,6 +837,22 @@ protected:

SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE]; ///< Swizzle mode table

// Max number of swizzle mode supported for equation
static const UINT_32 MaxSwModeType = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwModeType * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];

// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwModeType][MaxElementBytesLog2];

private:
// Disallow the copy constructor
Lib(const Lib& a);

+ 7
- 3
src/amd/addrlib/src/core/addrobject.cpp Прегледај датотеку

@@ -119,7 +119,7 @@ VOID* Object::Alloc(
size_t objSize ///< [in] Size to allocate
) const
{
return ClientAlloc(objSize, &m_client);
return ClientAlloc(objSize, &m_client);;
}

/**
@@ -216,16 +216,20 @@ VOID Object::DebugPrint(
#if DEBUG
if (m_client.callbacks.debugPrint != NULL)
{
va_list ap;

va_start(ap, pDebugString);

ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};

debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT);
debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
debugPrintInput.hClient = m_client.handle;
va_start(debugPrintInput.ap, pDebugString);
va_copy(debugPrintInput.ap, ap);

m_client.callbacks.debugPrint(&debugPrintInput);

va_end(debugPrintInput.ap);
va_end(ap);
}
#endif
}

+ 5834
- 7144
src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h
Разлика између датотеке није приказан због своје велике величине
Прегледај датотеку


+ 652
- 362
src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
Разлика између датотеке није приказан због своје велике величине
Прегледај датотеку


+ 94
- 80
src/amd/addrlib/src/gfx10/gfx10addrlib.h Прегледај датотеку

@@ -36,6 +36,7 @@

#include "addrlib2.h"
#include "coord.h"
#include "gfx10SwizzlePattern.h"

namespace Addr
{
@@ -93,7 +94,11 @@ const UINT_32 Gfx10Blk64KBSwModeMask = (1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx10ZSwModeMask = (1u << ADDR_SW_64KB_Z_X);
const UINT_32 Gfx10BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_R_X);

const UINT_32 Gfx10ZSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_VAR_Z_X);

const UINT_32 Gfx10StandardSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_4KB_S) |
@@ -109,14 +114,16 @@ const UINT_32 Gfx10DisplaySwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X);

const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X) |
(1u << ADDR_SW_VAR_R_X);

const UINT_32 Gfx10XSwModeMask = (1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);
(1u << ADDR_SW_64KB_R_X) |
Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10TSwModeMask = (1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_64KB_D_T);
@@ -131,7 +138,8 @@ const UINT_32 Gfx10Rsrc1dSwModeMask = Gfx10LinearSwModeMask |
const UINT_32 Gfx10Rsrc2dSwModeMask = Gfx10LinearSwModeMask |
Gfx10Blk256BSwModeMask |
Gfx10Blk4KBSwModeMask |
Gfx10Blk64KBSwModeMask;
Gfx10Blk64KBSwModeMask |
Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
@@ -141,14 +149,23 @@ const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);
(1u << ADDR_SW_64KB_R_X) |
Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10Rsrc2dPrtSwModeMask = (Gfx10Blk4KBSwModeMask | Gfx10Blk64KBSwModeMask) & ~Gfx10XSwModeMask;

const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10DisplaySwModeMask;

const UINT_32 Gfx10Rsrc3dThinSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask | Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask);

const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk4KBSwModeMask;

const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;

const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask |
Gfx10RenderSwModeMask;
@@ -290,6 +307,14 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;

virtual UINT_32 HwlComputeMaxBaseAlignments() const;

virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

// Initialize equation table
VOID InitEquationTable();

@@ -309,6 +334,7 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;

private:
UINT_32 ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern,
UINT_32 numBits,
@@ -351,13 +377,6 @@ protected:
return compressBlkDim;
}

static UINT_32 ShiftCeil(
UINT_32 a,
UINT_32 b)
{
return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0);
}

static void GetMipSize(
UINT_32 mip0Width,
UINT_32 mip0Height,
@@ -376,18 +395,39 @@ protected:
}
}

const UINT_64* GetSwizzlePattern(
const ADDR_SW_PATINFO* GetSwizzlePatternInfo(
AddrSwizzleMode swizzleMode,
AddrResourceType resourceType,
UINT_32 log2Elem,
UINT_32 numFrag) const;

VOID GetSwizzlePatternFromPatternInfo(
const ADDR_SW_PATINFO* pPatInfo,
ADDR_BIT_SETTING (&pSwizzle)[20]) const
{
memcpy(pSwizzle,
GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx]));

memcpy(&pSwizzle[8],
GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx]));

memcpy(&pSwizzle[12],
GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx]));

memcpy(&pSwizzle[16],
GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx]));
}

VOID ConvertSwizzlePatternToEquation(
UINT_32 elemLog2,
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
const UINT_64* pPattern,
ADDR_EQUATION* pEquation) const;
UINT_32 elemLog2,
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
const ADDR_SW_PATINFO* pPatInfo,
ADDR_EQUATION* pEquation) const;

static INT_32 GetMetaElementSizeLog2(Gfx10DataType dataType);

@@ -429,14 +469,6 @@ protected:
BOOL_32 pipeAlign,
Dim3d* pBlock) const;

BOOL_32 IsEquationCompatibleThick(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return IsThick(resourceType, swizzleMode) &&
((m_settings.supportRbPlus == 0) || (swizzleMode != ADDR_SW_64KB_D_X));
}

INT_32 GetPipeRotateAmount(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
@@ -460,61 +492,29 @@ protected:

}

static const Dim3d Block256_3d[MaxNumOfBpp];
static const Dim3d Block64K_3d[MaxNumOfBpp];
static const Dim3d Block4K_3d[MaxNumOfBpp];
static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];

static const Dim2d Block64K_2d[MaxNumOfBpp];
static const Dim2d Block4K_2d[MaxNumOfBpp];

static const Dim2d Block64K_Log2_2d[MaxNumOfBpp];
static const Dim2d Block4K_Log2_2d[MaxNumOfBpp];

static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];

// Max number of swizzle mode supported for equation
static const UINT_32 MaxSwMode = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];

// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
// Number of packers log2
UINT_32 m_numPkrLog2;
// Number of shader array log2
UINT_32 m_numSaLog2;

private:
virtual UINT_32 HwlComputeMaxBaseAlignments() const;

virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const;

static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet)
static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
{
ADDR2_BLOCK_SET allowedBlockSet = {};

allowedBlockSet.micro = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro4KB = (allowedSwModeSet.value & Gfx10Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro64KB = (allowedSwModeSet.value & Gfx10Blk64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask) ? TRUE : FALSE;
allowedBlockSet.micro = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask) ? TRUE : FALSE;
allowedBlockSet.var = (allowedSwModeSet.value & Gfx10BlkVarSwModeMask) ? TRUE : FALSE;

if (rsrcType == ADDR_RSRC_TEX_3D)
{
allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
}
else
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx10Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Blk64KBSwModeMask) ? TRUE : FALSE;
}

return allowedBlockSet;
}
@@ -554,12 +554,26 @@ private:
BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

static const UINT_32 ColumnBits = 2;
static const UINT_32 BankBits = 4;
static const UINT_32 ColumnBits = 2;
static const UINT_32 BankBits = 4;
static const UINT_32 UnalignedDccType = 3;

static const Dim3d Block256_3d[MaxNumOfBpp];
static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];

static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];

// Number of packers log2
UINT_32 m_numPkrLog2;
// Number of shader array log2
UINT_32 m_numSaLog2;

Gfx10ChipSettings m_settings;
UINT_32 m_colorBaseIndex;
UINT_32 m_htileBaseIndex;

UINT_32 m_colorBaseIndex;
UINT_32 m_xmaskBaseIndex;
UINT_32 m_dccBaseIndex;
};

} // V2

+ 196
- 120
src/amd/addrlib/src/gfx9/gfx9addrlib.cpp Прегледај датотеку

@@ -69,51 +69,50 @@ namespace V2
////////////////////////////////////////////////////////////////////////////////////////////////////

const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
{0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
{0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
{0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
{0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
{0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
{0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
{0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
{0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
{0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
{0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
{0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
{0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
{0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
{0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
{0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
{0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
{0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
{0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
{0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
};

const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
8, 6, 5, 4, 3, 2, 1, 0};
const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};

const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};

@@ -130,8 +129,7 @@ const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2
*/
Gfx9Lib::Gfx9Lib(const Client* pClient)
:
Lib(pClient),
m_numEquations(0)
Lib(pClient)
{
m_class = AI_ADDRLIB;
memset(&m_settings, 0, sizeof(m_settings));
@@ -281,8 +279,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
) const
{
// TODO: Clarify with AddrLib team
// ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);

UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
pIn->swizzleMode);
@@ -687,7 +684,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
*/
UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
{
return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
return Size64K;
}

/**
@@ -722,7 +719,7 @@ UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const

if (m_settings.metaBaseAlignFix)
{
maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
}

if (m_settings.htileAlignFix)
@@ -745,7 +742,7 @@ UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const

if (m_settings.metaBaseAlignFix)
{
maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
}

return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
@@ -1222,11 +1219,6 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
break;
}

m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);

if ((m_rbPerSeLog2 == 1) &&
(((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
@@ -1241,6 +1233,9 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
m_settings.htileCacheRbConflict = 1;
}
}

// For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
m_blockVarSizeLog2 = 0;
}
else
{
@@ -2164,6 +2159,7 @@ BOOL_32 Gfx9Lib::IsEquationSupported(
UINT_32 elementBytesLog2) const
{
BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
(IsValidSwMode(swMode) == TRUE) &&
(IsLinear(swMode) == FALSE) &&
(((IsTex2d(rsrcType) == TRUE) &&
((elementBytesLog2 < 4) ||
@@ -2197,7 +2193,7 @@ VOID Gfx9Lib::InitEquationTable()
AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);

// Loop all possible swizzle mode
for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
{
AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);

@@ -2209,7 +2205,7 @@ VOID Gfx9Lib::InitEquationTable()
// Check if the input is supported
if (IsEquationSupported(rsrcType, swMode, bppIdx))
{
ADDR_EQUATION equation;
ADDR_EQUATION equation;
ADDR_E_RETURNCODE retCode;

memset(&equation, 0, sizeof(ADDR_EQUATION));
@@ -2954,14 +2950,10 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
case ADDR_SW_4KB_R:
case ADDR_SW_64KB_D:
case ADDR_SW_64KB_R:
case ADDR_SW_VAR_D:
case ADDR_SW_VAR_R:
case ADDR_SW_4KB_D_X:
case ADDR_SW_4KB_R_X:
case ADDR_SW_64KB_D_X:
case ADDR_SW_64KB_R_X:
case ADDR_SW_VAR_D_X:
case ADDR_SW_VAR_R_X:
support = (pIn->bpp <= 64);
break;

@@ -2975,22 +2967,18 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
{
case ADDR_SW_4KB_D:
case ADDR_SW_64KB_D:
case ADDR_SW_VAR_D:
case ADDR_SW_64KB_D_T:
case ADDR_SW_4KB_D_X:
case ADDR_SW_64KB_D_X:
case ADDR_SW_VAR_D_X:
support = (pIn->bpp == 64);
break;

case ADDR_SW_LINEAR:
case ADDR_SW_4KB_S:
case ADDR_SW_64KB_S:
case ADDR_SW_VAR_S:
case ADDR_SW_64KB_S_T:
case ADDR_SW_4KB_S_X:
case ADDR_SW_64KB_S_X:
case ADDR_SW_VAR_S_X:
support = (pIn->bpp <= 64);
break;

@@ -3210,7 +3198,7 @@ BOOL_32 Gfx9Lib::ValidateSwModeParams(
{
BOOL_32 valid = TRUE;

if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
{
ADDR_ASSERT_ALWAYS();
valid = FALSE;
@@ -3229,7 +3217,6 @@ BOOL_32 Gfx9Lib::ValidateSwModeParams(
const AddrSwizzleMode swizzle = pIn->swizzleMode;
const BOOL_32 linear = IsLinear(swizzle);
const BOOL_32 blk256B = IsBlock256b(swizzle);
const BOOL_32 blkVar = IsBlockVariable(swizzle);
const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);

const ADDR2_SURFACE_FLAGS flags = pIn->flags;
@@ -3337,11 +3324,6 @@ BOOL_32 Gfx9Lib::ValidateSwModeParams(
valid = FALSE;
}
}
else if (blkVar)
{
ADDR_ASSERT_ALWAYS();
valid = FALSE;
}

return valid;
}
@@ -3444,12 +3426,22 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(

if (ValidateNonSwModeParams(&localIn))
{
// Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
// Forbid swizzle mode(s) by client setting
ADDR2_SWMODE_SET allowedSwModeSet = {};
allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThin4KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThick4KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThin64KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThick64KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);

if (pIn->preferredSwSet.value != 0)
{
@@ -3466,17 +3458,17 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(

if (pIn->maxAlign > 0)
{
if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
if (pIn->maxAlign < Size64K)
{
allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
}

if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
if (pIn->maxAlign < Size4K)
{
allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
}

if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
if (pIn->maxAlign < Size256)
{
allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
}
@@ -3583,7 +3575,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}

if ((numFrags > 1) &&
(GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
(Size4K < (m_pipeInterleaveBytes * numFrags)))
{
// MSAA surface must have blk_bytes/pipe_interleave >= num_samples
allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
@@ -3630,7 +3622,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(

pOut->validSwModeSet = allowedSwModeSet;
pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);

pOut->clientPreferredSwSet = pIn->preferredSwSet;
@@ -3640,6 +3632,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->clientPreferredSwSet.value = AddrSwSetAll;
}

// Apply optional restrictions
if (pIn->flags.needEquation)
{
FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
}

if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
{
pOut->swizzleMode = ADDR_SW_LINEAR;
@@ -3649,15 +3647,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// Always ignore linear swizzle mode if there is other choice.
allowedSwModeSet.swLinear = 0;

ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);

// Determine block size if there is 2 or more block type candidates
if (IsPow2(allowedBlockSet.value) == FALSE)
{
const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
UINT_64 padSize[AddrBlockMaxTiledType] = {0};
AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };

swMode[AddrBlockMicro] = ADDR_SW_256B_D;
swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;

if (pOut->resourceType == ADDR_RSRC_TEX_3D)
{
swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
}

Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
UINT_64 padSize[AddrBlockMaxTiledType] = {0};

const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
@@ -3683,7 +3692,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}

padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);

if ((minSize == 0) ||
((padSize[i] * ratioHi) <= (minSize * ratioLow)))
@@ -3697,28 +3706,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if ((allowedBlockSet.micro == TRUE) &&
(width <= blkDim[AddrBlockMicro].w) &&
(height <= blkDim[AddrBlockMicro].h) &&
(NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
(NextPow2(pIn->minSizeAlign) <= Size256))
{
minSizeBlk = AddrBlockMicro;
}

if (minSizeBlk == AddrBlockMicro)
{
ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
}
else if (minSizeBlk == AddrBlock4KB)
else if (minSizeBlk == AddrBlockThick4KB)
{
allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
}
else if (minSizeBlk == AddrBlockThin4KB)
{
allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
}
else if (minSizeBlk == AddrBlockThick64KB)
{
ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
}
else
{
ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
}
}

// Block type should be determined.
ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));

ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);

@@ -3775,7 +3797,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
{
allowedSwModeSet.value &= Gfx9RotateSwModeMask;
}
else if (displayRsrc && allowedSwSet.sw_D)
else if (allowedSwSet.sw_D)
{
allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
}
@@ -3794,8 +3816,8 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// Swizzle type should be determined.
ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));

// Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
// swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
// type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
}
@@ -3848,13 +3870,13 @@ ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
ASSERTED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];

ADDR_ASSERT(maxYCoordBlock256 ==
GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));

const UINT_32 maxYCoordInBaseEquation =
(blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
(blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;

ADDR_ASSERT(maxYCoordInBaseEquation ==
GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
@@ -4548,7 +4570,7 @@ Dim3d Gfx9Lib::GetMipStartPos(

// Report mip in tail if Mip0 is already in mip tail
BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
UINT_32 mipIndexInTail = mipId;

if (inMipTail == FALSE)
@@ -4598,7 +4620,7 @@ Dim3d Gfx9Lib::GetMipStartPos(

if (IsThick(resourceType, swizzleMode))
{
UINT_32 dim = log2blkSize % 3;
UINT_32 dim = log2BlkSize % 3;

if (dim == 0)
{
@@ -4618,7 +4640,7 @@ Dim3d Gfx9Lib::GetMipStartPos(
}
else
{
if (log2blkSize & 1)
if (log2BlkSize & 1)
{
inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
}
@@ -4648,7 +4670,7 @@ Dim3d Gfx9Lib::GetMipStartPos(

if (inMipTail)
{
UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
*pMipTailBytesOffset = MipTailOffset256B[index] << 8;
}
@@ -4729,7 +4751,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
if (IsThin(pIn->resourceType, pIn->swizzleMode))
{
UINT_32 blockOffset = 0;
UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);

if (IsZOrderSwizzle(pIn->swizzleMode))
{
@@ -4774,7 +4796,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;

// Sample bits start location
UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
// Join sample bits information to the highest Macro block bits
if (IsNonPrtXor(pIn->swizzleMode))
{
@@ -4787,7 +4809,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
// after this op, the blockOffset only contains log2 Macro block size bits
blockOffset %= (1 << sampleStart);
blockOffset |= (pIn->sample << sampleStart);
ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
}
}

@@ -4796,7 +4818,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
// Mask off bits above Macro block bits to keep page synonyms working for prt
if (IsPrt(pIn->swizzleMode))
{
blockOffset &= ((1 << log2blkSize) - 1);
blockOffset &= ((1 << log2BlkSize) - 1);
}

// Preserve offset inside pipe interleave
@@ -4804,13 +4826,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
blockOffset >>= m_pipeInterleaveLog2;

// Pipe/Se xor bits
pipeBits = GetPipeXorBits(log2blkSize);
pipeBits = GetPipeXorBits(log2BlkSize);
// Pipe xor
pipeXor = FoldXor2d(blockOffset, pipeBits);
blockOffset >>= pipeBits;

// Bank xor bits
bankBits = GetBankXorBits(log2blkSize);
bankBits = GetBankXorBits(log2BlkSize);
// Bank Xor
bankXor = FoldXor2d(blockOffset, bankBits);
blockOffset >>= bankBits;
@@ -4825,7 +4847,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
}

ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));

blockOffset |= mipTailBytesOffset;

@@ -4840,7 +4862,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
bankBits, pipeBits, &blockOffset);

blockOffset %= (1 << log2blkSize);
blockOffset %= (1 << log2BlkSize);

UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
@@ -4850,11 +4872,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
((pIn->x / localOut.blockWidth) + mipStartPos.w);

pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
}
else
{
UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);

Dim3d microBlockDim = Block1K_3d[log2ElementBytes];

@@ -4871,7 +4893,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
// Mask off bits above Macro block bits to keep page synonyms working for prt
if (IsPrt(pIn->swizzleMode))
{
blockOffset &= ((1 << log2blkSize) - 1);
blockOffset &= ((1 << log2BlkSize) - 1);
}

// Preserve offset inside pipe interleave
@@ -4879,13 +4901,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
blockOffset >>= m_pipeInterleaveLog2;

// Pipe/Se xor bits
pipeBits = GetPipeXorBits(log2blkSize);
pipeBits = GetPipeXorBits(log2BlkSize);
// Pipe xor
pipeXor = FoldXor3d(blockOffset, pipeBits);
blockOffset >>= pipeBits;

// Bank xor bits
bankBits = GetBankXorBits(log2blkSize);
bankBits = GetBankXorBits(log2BlkSize);
// Bank Xor
bankXor = FoldXor3d(blockOffset, bankBits);
blockOffset >>= bankBits;
@@ -4900,13 +4922,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
}

ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
blockOffset |= mipTailBytesOffset;

returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
bankBits, pipeBits, &blockOffset);

blockOffset %= (1 << log2blkSize);
blockOffset %= (1 << log2BlkSize);

UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
@@ -4917,7 +4939,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
(localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;

pOut->addr = blockOffset | (blockIndex << log2blkSize);
pOut->addr = blockOffset | (blockIndex << log2BlkSize);
}
}
else
@@ -4996,5 +5018,59 @@ ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
return returnCode;
}

/**
************************************************************************************************************************
* Gfx9Lib::ComputeThinBlockDimension
*
* @brief
* Internal function to get thin block width/height/depth in element from surface input params.
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_ASSERT(IsThin(resourceType, swizzleMode));

const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 eleBytes = bpp >> 3;
const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
const UINT_32 widthAmp = log2blkSizeIn256B / 2;
const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;

ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));

*pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
*pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
*pDepth = 1;

if (numSamples > 1)
{
const UINT_32 log2sample = Log2(numSamples);
const UINT_32 q = log2sample >> 1;
const UINT_32 r = log2sample & 1;

if (log2BlkSize & 1)
{
*pWidth >>= q;
*pHeight >>= (q + r);
}
else
{
*pWidth >>= (q + r);
*pHeight >>= q;
}
}
}

} // V2
} // Addr

+ 154
- 172
src/amd/addrlib/src/gfx9/gfx9addrlib.h Прегледај датотеку

@@ -114,49 +114,32 @@ const UINT_32 Gfx9Blk64KBSwModeMask = (1u << ADDR_SW_64KB_Z) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx9BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z) |
(1u << ADDR_SW_VAR_S) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_VAR_R) |
(1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_S_X) |
(1u << ADDR_SW_VAR_D_X) |
(1u << ADDR_SW_VAR_R_X);

const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z) |
(1u << ADDR_SW_64KB_Z) |
(1u << ADDR_SW_VAR_Z) |
(1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_VAR_Z_X);
(1u << ADDR_SW_64KB_Z_X);

const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_VAR_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_VAR_S_X);
(1u << ADDR_SW_64KB_S_X);

const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_VAR_D_X);
(1u << ADDR_SW_64KB_D_X);

const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R) |
(1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_VAR_R) |
(1u << ADDR_SW_64KB_R_T) |
(1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_R_X) |
(1u << ADDR_SW_VAR_R_X);
(1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_4KB_S_X) |
@@ -165,11 +148,7 @@ const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X) |
(1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_S_X) |
(1u << ADDR_SW_VAR_D_X) |
(1u << ADDR_SW_VAR_R_X);
(1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_64KB_S_T) |
@@ -197,6 +176,16 @@ const UINT_32 Gfx9Rsrc3dPrtSwModeMask = Gfx9Rsrc2dPrtSwModeMask & ~Gfx9RotateSwM

const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask;

const UINT_32 Gfx9Rsrc3dThin4KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk4KBSwModeMask;

const UINT_32 Gfx9Rsrc3dThin64KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk64KBSwModeMask;

const UINT_32 Gfx9Rsrc3dThickSwModeMask = Gfx9Rsrc3dSwModeMask & ~(Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask);

const UINT_32 Gfx9Rsrc3dThick4KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk4KBSwModeMask;

const UINT_32 Gfx9Rsrc3dThick64KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk64KBSwModeMask;

const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask;

const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) |
@@ -204,14 +193,10 @@ const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_VAR_R) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X) |
(1u << ADDR_SW_VAR_D_X) |
(1u << ADDR_SW_VAR_R_X);
(1u << ADDR_SW_64KB_R_X);

const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_256B_R) |
@@ -220,19 +205,14 @@ const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_VAR_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_VAR_S_X);

(1u << ADDR_SW_64KB_S_X);
const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_VAR_D_X) |
Dcn1NonBpp64SwModeMask;

/**
@@ -273,9 +253,6 @@ public:
return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
}

virtual BOOL_32 IsValidDisplaySwizzleMode(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

protected:
Gfx9Lib(const Client* pClient);
virtual ~Gfx9Lib();
@@ -376,22 +353,6 @@ protected:
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2) const;

UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
{
UINT_32 baseAlign;

if (IsXor(swizzleMode))
{
baseAlign = GetBlockSize(swizzleMode);
}
else
{
baseAlign = 256;
}

return baseAlign;
}

virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const;
@@ -423,6 +384,137 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;

virtual UINT_32 HwlComputeMaxBaseAlignments() const;

virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

virtual VOID ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;

private:
VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;

VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;

VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;

VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;

const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);

VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;

BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pMipmap0PaddedWidth,
UINT_32* pSlice0PaddedHeight,
ADDR2_MIP_INFO* pMipInfo = NULL) const;

static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
{
ADDR2_BLOCK_SET allowedBlockSet = {};

allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE;

if (rsrcType == ADDR_RSRC_TEX_3D)
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx9Rsrc3dThin4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
}
else
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
}

return allowedBlockSet;
}

static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_SWTYPE_SET allowedSwSet = {};

allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask) ? TRUE : FALSE;

return allowedSwSet;
}

BOOL_32 IsInMipTail(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
Dim3d mipTailDim,
UINT_32 width,
UINT_32 height,
UINT_32 depth) const
{
BOOL_32 inTail = ((width <= mipTailDim.w) &&
(height <= mipTailDim.h) &&
(IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));

return inTail;
}

BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const
{
UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);

// Bank xor bits
UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);

return bankBits;
}

UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
{
UINT_32 baseAlign;

if (IsXor(swizzleMode))
{
baseAlign = GetBlockSize(swizzleMode);
}
else
{
baseAlign = 256;
}

return baseAlign;
}

// Initialize equation table
VOID InitEquationTable();

@@ -522,127 +614,17 @@ protected:
return compressBlkDim;
}

static const UINT_32 MaxSeLog2 = 3;
static const UINT_32 MaxRbPerSeLog2 = 2;
static const UINT_32 MaxSeLog2 = 3;
static const UINT_32 MaxRbPerSeLog2 = 2;

static const Dim3d Block256_3dS[MaxNumOfBpp];
static const Dim3d Block256_3dZ[MaxNumOfBpp];
static const Dim3d Block256_3dS[MaxNumOfBpp];
static const Dim3d Block256_3dZ[MaxNumOfBpp];

static const UINT_32 MipTailOffset256B[];
static const UINT_32 MipTailOffset256B[];

static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];

// Max number of swizzle mode supported for equation
static const UINT_32 MaxSwMode = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];

// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];

static const UINT_32 MaxCachedMetaEq = 2;

private:
virtual UINT_32 HwlComputeMaxBaseAlignments() const;

virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;

VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;

VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;

VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;

const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);

virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;

ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pMipmap0PaddedWidth,
UINT_32* pSlice0PaddedHeight,
ADDR2_MIP_INFO* pMipInfo = NULL) const;

static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_BLOCK_SET allowedBlockSet = {};

allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro4KB = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.var = (allowedSwModeSet.value & Gfx9BlkVarSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE;

return allowedBlockSet;
}

static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_SWTYPE_SET allowedSwSet = {};

allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask) ? TRUE : FALSE;

return allowedSwSet;
}

BOOL_32 IsInMipTail(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
Dim3d mipTailDim,
UINT_32 width,
UINT_32 height,
UINT_32 depth) const
{
BOOL_32 inTail = ((width <= mipTailDim.w) &&
(height <= mipTailDim.h) &&
(IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));

return inTail;
}

BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const
{
UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);

// Bank xor bits
UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);

return bankBits;
}
static const UINT_32 MaxCachedMetaEq = 2;

Gfx9ChipSettings m_settings;


+ 0
- 1
src/amd/addrlib/src/r800/ciaddrlib.h Прегледај датотеку

@@ -151,7 +151,6 @@ protected:
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;

private:

VOID ReadGbTileMode(
UINT_32 regValue, TileConfig* pCfg) const;


+ 0
- 1
src/amd/addrlib/src/r800/siaddrlib.h Прегледај датотеку

@@ -76,7 +76,6 @@ struct SiChipSettings
UINT_32 isSpectre : 1;
UINT_32 isSpooky : 1;
UINT_32 isKalindi : 1;
// Hawaii is GFXIP 7.2
UINT_32 isHawaii : 1;

// VI

+ 0
- 7
src/amd/common/ac_surface.c Прегледај датотеку

@@ -85,7 +85,6 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,

if (addrCreateInput.chipFamily >= FAMILY_AI) {
addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
regValue.blockVarSizeLog2 = 0;
} else {
regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
@@ -1599,11 +1598,9 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_256B_S:
case ADDR_SW_4KB_S:
case ADDR_SW_64KB_S:
case ADDR_SW_VAR_S:
case ADDR_SW_64KB_S_T:
case ADDR_SW_4KB_S_X:
case ADDR_SW_64KB_S_X:
case ADDR_SW_VAR_S_X:
surf->micro_tile_mode = RADEON_MICRO_MODE_THIN;
break;

@@ -1612,11 +1609,9 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_256B_D:
case ADDR_SW_4KB_D:
case ADDR_SW_64KB_D:
case ADDR_SW_VAR_D:
case ADDR_SW_64KB_D_T:
case ADDR_SW_4KB_D_X:
case ADDR_SW_64KB_D_X:
case ADDR_SW_VAR_D_X:
surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
break;

@@ -1624,7 +1619,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_256B_R:
case ADDR_SW_4KB_R:
case ADDR_SW_64KB_R:
case ADDR_SW_VAR_R:
case ADDR_SW_64KB_R_T:
case ADDR_SW_4KB_R_X:
case ADDR_SW_64KB_R_X:
@@ -1641,7 +1635,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
/* Z = depth. */
case ADDR_SW_4KB_Z:
case ADDR_SW_64KB_Z:
case ADDR_SW_VAR_Z:
case ADDR_SW_64KB_Z_T:
case ADDR_SW_4KB_Z_X:
case ADDR_SW_64KB_Z_X:

Loading…
Откажи
Сачувај