Browse Source

nv50/ir/opt: try to convert ABS(SUB) to SAD

tags/i965-primitive-restart-v2
Christoph Bumiller 13 years ago
parent
commit
1f4c154f02

+ 25
- 0
src/gallium/drivers/nv50/codegen/nv50_ir.cpp View File

@@ -658,6 +658,31 @@ Instruction::swapSources(int a, int b)
srcs[b].mod = m;
}

// TODO: extend for delta < 0
void
Instruction::moveSources(int s, int delta)
{
if (delta == 0)
return;
assert(delta > 0);

int k;
for (k = 0; srcExists(k); ++k) {
for (int i = 0; i < 2; ++i) {
if (src(k).indirect[i] >= s)
src(k).indirect[i] += delta;
}
}
if (predSrc >= s)
predSrc += delta;
if (flagsSrc >= s)
flagsSrc += delta;

--k;
for (int p = k + delta; k >= s; --k, --p)
setSrc(p, src(k));
}

void
Instruction::takeExtraSources(int s, Value *values[3])
{

+ 1
- 0
src/gallium/drivers/nv50/codegen/nv50_ir.h View File

@@ -603,6 +603,7 @@ public:
void setSrc(int s, Value *);
void setSrc(int s, const ValueRef&);
void swapSources(int a, int b);
void moveSources(int s, int delta); // NOTE: only delta > 0 implemented
bool setIndirect(int s, int dim, Value *);

inline ValueRef& src(int s) { return srcs[s]; }

+ 33
- 0
src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp View File

@@ -99,6 +99,7 @@ private:
void emitFMUL(const Instruction *);
void emitFMAD(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);

void emitMINMAX(const Instruction *);

@@ -1022,6 +1023,35 @@ CodeEmitterNV50::emitIMAD(const Instruction *i)
}
}

void
CodeEmitterNV50::emitISAD(const Instruction *i)
{
if (i->encSize == 8) {
code[0] = 0x50000000;
switch (i->sType) {
case TYPE_U32: code[1] = 0x04000000; break;
case TYPE_S32: code[1] = 0x0c000000; break;
case TYPE_U16: code[1] = 0x00000000; break;
case TYPE_S16: code[1] = 0x08000000; break;
default:
assert(0);
break;
}
emitForm_MAD(i);
} else {
switch (i->sType) {
case TYPE_U32: code[0] = 0x50008000; break;
case TYPE_S32: code[0] = 0x50008100; break;
case TYPE_U16: code[0] = 0x50000000; break;
case TYPE_S16: code[0] = 0x50000100; break;
default:
assert(0);
break;
}
emitForm_MUL(i);
}
}

void
CodeEmitterNV50::emitSET(const Instruction *i)
{
@@ -1543,6 +1573,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
else
emitIMAD(insn);
break;
case OP_SAD:
emitISAD(insn);
break;
case OP_NOT:
emitNOT(insn);
break;

+ 11
- 0
src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h View File

@@ -114,6 +114,17 @@ static inline bool isSignedType(DataType ty)
}
}

static inline DataType intTypeToSigned(DataType ty)
{
switch (ty) {
case TYPE_U32: return TYPE_S32;
case TYPE_U16: return TYPE_S16;
case TYPE_U8: return TYPE_S8;
default:
return ty;
}
}

const ValueRef *ValueRef::getIndirect(int dim) const
{
return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL;

+ 92
- 15
src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp View File

@@ -915,57 +915,129 @@ class AlgebraicOpt : public Pass
private:
virtual bool visit(BasicBlock *);

void handleADD(Instruction *);
void handleABS(Instruction *);
bool handleADD(Instruction *);
bool tryADDToMADOrSAD(Instruction *, operation toOp);
void handleMINMAX(Instruction *);
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
void handleCVT(Instruction *);

BuildUtil bld;
};

void
AlgebraicOpt::handleABS(Instruction *abs)
{
Instruction *sub = abs->getSrc(0)->getInsn();
DataType ty;
if (!sub ||
!prog->getTarget()->isOpSupported(OP_SAD, abs->dType))
return;
// expect not to have mods yet, if we do, bail
if (sub->src(0).mod || sub->src(1).mod)
return;
// hidden conversion ?
ty = intTypeToSigned(sub->dType);
if (abs->dType != abs->sType || ty != abs->sType)
return;

if ((sub->op != OP_ADD && sub->op != OP_SUB) ||
sub->src(0).getFile() != FILE_GPR || sub->src(0).mod ||
sub->src(1).getFile() != FILE_GPR || sub->src(1).mod)
return;

Value *src0 = sub->getSrc(0);
Value *src1 = sub->getSrc(1);

if (sub->op == OP_ADD) {
Instruction *neg = sub->getSrc(1)->getInsn();
if (neg && neg->op != OP_NEG) {
neg = sub->getSrc(0)->getInsn();
src0 = sub->getSrc(1);
}
if (!neg || neg->op != OP_NEG ||
neg->dType != neg->sType || neg->sType != ty)
return;
src1 = neg->getSrc(0);
}

// found ABS(SUB))
abs->moveSources(1, 2); // move sources >=1 up by 2
abs->op = OP_SAD;
abs->setType(sub->dType);
abs->setSrc(0, src0);
abs->setSrc(1, src1);
bld.setPosition(abs, false);
abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0));
}

bool
AlgebraicOpt::handleADD(Instruction *add)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);

if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
return false;

bool changed = false;
if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType))
changed = tryADDToMADOrSAD(add, OP_MAD);
if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
changed = tryADDToMADOrSAD(add, OP_SAD);
return changed;
}

// ADD(SAD(a,b,0), c) -> SAD(a,b,c)
// ADD(MUL(a,b), c) -> MAD(a,b,c)
bool
AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);
Value *src;
int s;
const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL;
const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0));
Modifier mod[4];

if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType))
return;

if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
return;

if (src0->refCount() == 1 &&
src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL)
src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp)
s = 0;
else
if (src1->refCount() == 1 &&
src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL)
src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp)
s = 1;
else
return;
return false;

if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
(src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
return;
return false;

src = add->getSrc(s);

if (src->getInsn()->postFactor)
return;
return false;
if (toOp == OP_SAD) {
ImmediateValue imm;
if (!src->getInsn()->src(2).getImmediate(imm))
return false;
if (!imm.isInteger(0))
return false;
}

mod[0] = add->src(0).mod;
mod[1] = add->src(1).mod;
mod[2] = src->getUniqueInsn()->src(0).mod;
mod[3] = src->getUniqueInsn()->src(1).mod;

if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG))
return;
if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
return false;

add->op = OP_MAD;
add->op = toOp;
add->subOp = src->getInsn()->subOp; // potentially mul-high

add->setSrc(2, add->src(s ? 0 : 1));
@@ -974,6 +1046,8 @@ AlgebraicOpt::handleADD(Instruction *add)
add->src(0).mod = mod[2] ^ mod[s];
add->setSrc(1, src->getInsn()->getSrc(1));
add->src(1).mod = mod[3];

return true;
}

void
@@ -1140,6 +1214,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
switch (i->op) {
case OP_ABS:
handleABS(i);
break;
case OP_ADD:
handleADD(i);
break;

+ 16
- 0
src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp View File

@@ -87,6 +87,7 @@ private:
void emitUMUL(const Instruction *);
void emitFMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
void emitFMAD(const Instruction *);

void emitNOT(Instruction *);
@@ -620,6 +621,18 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
code[0] |= 1 << 6;
}

void
CodeEmitterNVC0::emitISAD(const Instruction *i)
{
assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
assert(i->encSize == 8);

emitForm_A(i, HEX64(38000000, 00000003));

if (i->dType == TYPE_S32)
code[0] |= 1 << 5;
}

void
CodeEmitterNVC0::emitNOT(Instruction *i)
{
@@ -1608,6 +1621,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
else
emitIMAD(insn);
break;
case OP_SAD:
emitISAD(insn);
break;
case OP_NOT:
emitNOT(insn);
break;

+ 1
- 1
src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp View File

@@ -456,7 +456,7 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
{
if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
return false;
if (op == OP_SAD && ty != TYPE_S32)
if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32)
return false;
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
return false;

Loading…
Cancel
Save