Procházet zdrojové kódy

gm107/ir: use scalar tex instructions where possible

TEXS, TLD4 and TLD4S are variants of tex instructions which are more
scalar, which gives RA more freedom and is less likely to insert silly
MOVs to satisfy quad registers.

shader-db changes:
total instructions in shared programs : 7687265 -> 7614782 (-0.94%)
total gprs used in shared programs    : 803620 -> 798045 (-0.69%)
total shared used in shared programs  : 639636 -> 639636 (0.00%)
total local used in shared programs   : 24648 -> 24648 (0.00%)
total bytes used in shared programs   : 82103400 -> 81330696 (-0.94%)

                local     shared        gpr       inst      bytes
    helped           0           0        3648       10647       10647
      hurt           0           0         464         205         205

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
tags/19.0-branchpoint
Karol Herbst před 7 roky
rodič
revize
f821e80213

+ 155
- 3
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp Zobrazit soubor

@@ -192,6 +192,7 @@ private:

void emitTEXs(int);
void emitTEX();
void emitTEXS();
void emitTLD();
void emitTLD4();
void emitTXD();
@@ -2718,6 +2719,103 @@ CodeEmitterGM107::emitTEXs(int pos)
emitGPR(pos);
}

static uint8_t
getTEXSMask(uint8_t mask)
{
switch (mask) {
case 0x1: return 0x0;
case 0x2: return 0x1;
case 0x3: return 0x4;
case 0x4: return 0x2;
case 0x7: return 0x0;
case 0x8: return 0x3;
case 0x9: return 0x5;
case 0xa: return 0x6;
case 0xb: return 0x1;
case 0xc: return 0x7;
case 0xd: return 0x2;
case 0xe: return 0x3;
case 0xf: return 0x4;
default:
assert(!"invalid mask");
}
}

static uint8_t
getTEXSTarget(const TexInstruction *tex)
{
assert(tex->op == OP_TEX || tex->op == OP_TXL);

switch (tex->tex.target.getEnum()) {
case TEX_TARGET_1D:
assert(tex->tex.levelZero);
return 0x0;
case TEX_TARGET_2D:
case TEX_TARGET_RECT:
if (tex->tex.levelZero)
return 0x2;
if (tex->op == OP_TXL)
return 0x3;
return 0x1;
case TEX_TARGET_2D_SHADOW:
case TEX_TARGET_RECT_SHADOW:
if (tex->tex.levelZero)
return 0x6;
if (tex->op == OP_TXL)
return 0x5;
return 0x4;
case TEX_TARGET_2D_ARRAY:
if (tex->tex.levelZero)
return 0x8;
return 0x7;
case TEX_TARGET_2D_ARRAY_SHADOW:
assert(tex->tex.levelZero);
return 0x9;
case TEX_TARGET_3D:
if (tex->tex.levelZero)
return 0xb;
assert(tex->op != OP_TXL);
return 0xa;
case TEX_TARGET_CUBE:
assert(!tex->tex.levelZero);
if (tex->op == OP_TXL)
return 0xd;
return 0xc;
default:
assert(false);
return 0x0;
}
}

static uint8_t
getTLDSTarget(const TexInstruction *tex)
{
switch (tex->tex.target.getEnum()) {
case TEX_TARGET_1D:
if (tex->tex.levelZero)
return 0x0;
return 0x1;
case TEX_TARGET_2D:
case TEX_TARGET_RECT:
if (tex->tex.levelZero)
return tex->tex.useOffsets ? 0x4 : 0x2;
return tex->tex.useOffsets ? 0xc : 0x5;
case TEX_TARGET_2D_MS:
assert(tex->tex.levelZero);
return 0x6;
case TEX_TARGET_3D:
assert(tex->tex.levelZero);
return 0x7;
case TEX_TARGET_2D_ARRAY:
assert(tex->tex.levelZero);
return 0x8;

default:
assert(false);
return 0x0;
}
}

void
CodeEmitterGM107::emitTEX()
{
@@ -2760,6 +2858,49 @@ CodeEmitterGM107::emitTEX()
emitGPR (0x00, insn->def(0));
}

void
CodeEmitterGM107::emitTEXS()
{
const TexInstruction *insn = this->insn->asTex();

switch (insn->op) {
case OP_TEX:
case OP_TXL:
emitInsn (0xd8000000);
emitField(0x35, 4, getTEXSTarget(insn));
emitField(0x32, 3, getTEXSMask(insn->tex.mask));
break;
case OP_TXF:
emitInsn (0xda000000);
emitField(0x35, 4, getTLDSTarget(insn));
emitField(0x32, 3, getTEXSMask(insn->tex.mask));
break;
case OP_TXG:
assert(insn->tex.useOffsets != 4);
emitInsn (0xdf000000);
emitField(0x34, 2, insn->tex.gatherComp);
emitField(0x33, 1, insn->tex.useOffsets == 1);
emitField(0x32, 1, insn->tex.target.isShadow());
break;
default:
unreachable("unknown op in emitTEXS()");
break;
}

emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x24, 13, insn->tex.r);
if (insn->defExists(1))
emitGPR(0x1c, insn->def(1));
else
emitGPR(0x1c);
if (insn->srcExists(1))
emitGPR(0x14, insn->getSrc(1));
else
emitGPR(0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}

void
CodeEmitterGM107::emitTLD()
{
@@ -3474,15 +3615,26 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitPIXLD();
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
if (insn->asTex()->tex.scalar)
emitTEXS();
else
emitTEX();
break;
case OP_TXB:
emitTEX();
break;
case OP_TXF:
emitTLD();
if (insn->asTex()->tex.scalar)
emitTEXS();
else
emitTLD();
break;
case OP_TXG:
emitTLD4();
if (insn->asTex()->tex.scalar)
emitTEXS();
else
emitTLD4();
break;
case OP_TXD:
emitTXD();

+ 162
- 0
src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp Zobrazit soubor

@@ -275,6 +275,9 @@ private:
void texConstraintNVE0(TexInstruction *);
void texConstraintGM107(TexInstruction *);

bool isScalarTexGM107(TexInstruction *);
void handleScalarTexGM107(TexInstruction *);

std::list<Instruction *> constrList;

const Target *targ;
@@ -2119,6 +2122,158 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
constrList.push_back(merge);
}

bool
RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex)
{
if (tex->tex.sIndirectSrc >= 0 ||
tex->tex.rIndirectSrc >= 0)
return false;

if (tex->tex.mask == 5 || tex->tex.mask == 6)
return false;

switch (tex->op) {
case OP_TEX:
case OP_TXF:
case OP_TXG:
case OP_TXL:
break;
default:
return false;
}

// legal variants:
// TEXS.1D.LZ
// TEXS.2D
// TEXS.2D.LZ
// TEXS.2D.LL
// TEXS.2D.DC
// TEXS.2D.LL.DC
// TEXS.2D.LZ.DC
// TEXS.A2D
// TEXS.A2D.LZ
// TEXS.A2D.LZ.DC
// TEXS.3D
// TEXS.3D.LZ
// TEXS.CUBE
// TEXS.CUBE.LL

// TLDS.1D.LZ
// TLDS.1D.LL
// TLDS.2D.LZ
// TLSD.2D.LZ.AOFFI
// TLDS.2D.LZ.MZ
// TLDS.2D.LL
// TLDS.2D.LL.AOFFI
// TLDS.A2D.LZ
// TLDS.3D.LZ

// TLD4S: all 2D/RECT variants and only offset

switch (tex->op) {
case OP_TEX:
if (tex->tex.useOffsets)
return false;

switch (tex->tex.target.getEnum()) {
case TEX_TARGET_1D:
case TEX_TARGET_2D_ARRAY_SHADOW:
return tex->tex.levelZero;
case TEX_TARGET_CUBE:
return !tex->tex.levelZero;
case TEX_TARGET_2D:
case TEX_TARGET_2D_ARRAY:
case TEX_TARGET_2D_SHADOW:
case TEX_TARGET_3D:
case TEX_TARGET_RECT:
case TEX_TARGET_RECT_SHADOW:
return true;
default:
return false;
}

case OP_TXL:
if (tex->tex.useOffsets)
return false;

switch (tex->tex.target.getEnum()) {
case TEX_TARGET_2D:
case TEX_TARGET_2D_SHADOW:
case TEX_TARGET_RECT:
case TEX_TARGET_RECT_SHADOW:
case TEX_TARGET_CUBE:
return true;
default:
return false;
}

case OP_TXF:
switch (tex->tex.target.getEnum()) {
case TEX_TARGET_1D:
return !tex->tex.useOffsets;
case TEX_TARGET_2D:
case TEX_TARGET_RECT:
return true;
case TEX_TARGET_2D_ARRAY:
case TEX_TARGET_2D_MS:
case TEX_TARGET_3D:
return !tex->tex.useOffsets && tex->tex.levelZero;
default:
return false;
}

case OP_TXG:
if (tex->tex.useOffsets > 1)
return false;
if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf)
return false;

switch (tex->tex.target.getEnum()) {
case TEX_TARGET_2D:
case TEX_TARGET_2D_MS:
case TEX_TARGET_2D_SHADOW:
case TEX_TARGET_RECT:
case TEX_TARGET_RECT_SHADOW:
return true;
default:
return false;
}

default:
return false;
}
}

void
RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex)
{
int defCount = tex->defCount(0xff);
int srcCount = tex->srcCount(0xff);

tex->tex.scalar = true;

// 1. handle defs
if (defCount > 3)
condenseDefs(tex, 2, 3);
if (defCount > 1)
condenseDefs(tex, 0, 1);

// 2. handle srcs
// special case for TXF.A2D
if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) {
assert(srcCount >= 3);
condenseSrcs(tex, 1, 2);
} else {
if (srcCount > 3)
condenseSrcs(tex, 2, 3);
// only if we have more than 2 sources
if (srcCount > 2)
condenseSrcs(tex, 0, 1);
}

assert(!tex->defExists(2) && !tex->srcExists(2));
}

void
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
{
@@ -2126,6 +2281,13 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)

if (isTextureOp(tex->op))
textureMask(tex);

if (isScalarTexGM107(tex)) {
handleScalarTexGM107(tex);
return;
}

assert(!tex->tex.scalar);
condenseDefs(tex);

if (isSurfaceOp(tex->op)) {

Načítá se…
Zrušit
Uložit