Browse Source

nv50/ir: implement splitting of 64 bit ops after RA

tags/mesa-9.2-rc1
Christoph Bumiller 12 years ago
parent
commit
99e4eba669

+ 2
- 1
src/gallium/drivers/nv50/codegen/nv50_ir.h View File

@@ -48,7 +48,7 @@ enum operation
OP_MOV, // simple copy, no modifiers allowed
OP_LOAD,
OP_STORE,
OP_ADD,
OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds
OP_SUB,
OP_MUL,
OP_DIV,
@@ -707,6 +707,7 @@ public:

inline void setFlagsSrc(int s, Value *);
inline void setFlagsDef(int d, Value *);
inline bool usesFlags() const { return flagsSrc >= 0; }

unsigned int defCount() const { return defs.size(); };
unsigned int defCount(unsigned int mask, bool singleFile = false) const;

+ 70
- 0
src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp View File

@@ -541,4 +541,74 @@ BuildUtil::DataArray::mkSymbol(int i, int c)
return sym;
}


Instruction *
BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
Value *zero,
Value *carry)
{
DataType hTy;
int srcNr;

switch (i->dType) {
case TYPE_U64: hTy = TYPE_U32; break;
case TYPE_S64: hTy = TYPE_S32; break;
default:
return NULL;
}

switch (i->op) {
case OP_MOV: srcNr = 1; break;
case OP_ADD:
case OP_SUB:
if (!carry)
return NULL;
srcNr = 2;
break;
default:
// TODO when needed
return NULL;
}

i->setType(hTy);
i->setDef(0, cloneShallow(fn, i->getDef(0)));
i->getDef(0)->reg.size = 4;
Instruction *lo = i;
Instruction *hi = cloneForward(fn, i);
lo->bb->insertAfter(lo, hi);

hi->getDef(0)->reg.data.id++;

for (int s = 0; s < srcNr; ++s) {
if (lo->getSrc(s)->reg.size < 8) {
hi->setSrc(s, zero);
} else {
if (lo->getSrc(s)->refCount() > 1)
lo->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
lo->getSrc(s)->reg.size /= 2;
hi->setSrc(s, cloneShallow(fn, lo->getSrc(s)));

switch (hi->src(s).getFile()) {
case FILE_IMMEDIATE:
hi->getSrc(s)->reg.data.u64 >>= 32;
break;
case FILE_MEMORY_CONST:
case FILE_MEMORY_SHARED:
case FILE_SHADER_INPUT:
hi->getSrc(s)->reg.data.offset += 4;
break;
default:
assert(hi->src(s).getFile() == FILE_GPR);
hi->getSrc(s)->reg.data.id++;
break;
}
}
}
if (srcNr == 2) {
lo->setDef(1, carry);
hi->setFlagsSrc(hi->srcCount(), carry);
}
return hi;
}

} // namespace nv50_ir

+ 4
- 0
src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h View File

@@ -101,6 +101,10 @@ public:

Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); }

// returns high part of the operation
static Instruction *split64BitOpPostRA(Function *, Instruction *,
Value *zero, Value *carry);

struct Location
{
Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c)

+ 8
- 19
src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp View File

@@ -122,7 +122,6 @@ private:

void handlePRERET(FlowInstruction *);
void replaceZero(Instruction *);
void split64BitOp(Instruction *);

LValue *r63;
};
@@ -160,22 +159,6 @@ NV50LegalizePostRA::replaceZero(Instruction *i)
}
}

void
NV50LegalizePostRA::split64BitOp(Instruction *i)
{
if (i->dType == TYPE_F64) {
if (i->op == OP_MAD)
i->op = OP_FMA;
if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
i->op == OP_SET)
return;
i->dType = i->sType = TYPE_U32;

i->bb->insertAfter(i, cloneForward(func, i));
}
}

// Emulate PRERET: jump to the target and call to the origin from there
//
// WARNING: atm only works if BBs are affected by at most a single PRERET
@@ -229,12 +212,18 @@ NV50LegalizePostRA::visit(BasicBlock *bb)
if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) {
handlePRERET(i->asFlow());
} else {
// TODO: We will want to do this before register allocation,
// since have to use a $c register for the carry flag.
if (typeSizeof(i->dType) == 8) {
Instruction *hi = BuildUtil::split64BitOpPostRA(func, i, r63, NULL);
if (hi)
next = hi;
}

if (i->op != OP_MOV && i->op != OP_PFETCH &&
i->op != OP_BAR &&
(!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS))
replaceZero(i);
if (typeSizeof(i->dType) == 8)
split64BitOp(i);
}
}
if (!bb->getEntry())

+ 2
- 0
src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp View File

@@ -667,6 +667,8 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
}
break;
case OP_ADD:
if (i->usesFlags())
break;
if (imm0.isInteger(0)) {
if (s == 0) {
i->setSrc(0, i->getSrc(1));

+ 12
- 19
src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp View File

@@ -128,7 +128,6 @@ private:
virtual bool visit(BasicBlock *);

void replaceZero(Instruction *);
void split64BitOp(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);

@@ -158,6 +157,7 @@ private:

private:
LValue *rZero;
LValue *carry;
const bool needTexBar;
};

@@ -468,8 +468,10 @@ NVC0LegalizePostRA::visit(Function *fn)
insertTextureBarriers(fn);

rZero = new_LValue(fn, FILE_GPR);
carry = new_LValue(fn, FILE_FLAGS);

rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
carry->reg.data.id = 0;

return true;
}
@@ -486,22 +488,6 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
}
}

void
NVC0LegalizePostRA::split64BitOp(Instruction *i)
{
if (i->dType == TYPE_F64) {
if (i->op == OP_MAD)
i->op = OP_FMA;
if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
i->op == OP_SET)
return;
i->dType = i->sType = TYPE_U32;

i->bb->insertAfter(i, cloneForward(func, i));
}
}

// replace CONT with BRA for single unconditional continue
bool
NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
@@ -565,10 +551,17 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
if (i->isNop()) {
bb->remove(i);
} else {
// TODO: Move this to before register allocation for operations that
// need the $c register !
if (typeSizeof(i->dType) == 8) {
Instruction *hi;
hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry);
if (hi)
next = hi;
}

if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
if (typeSizeof(i->dType) == 8)
split64BitOp(i);
}
}
if (!bb->getEntry())

Loading…
Cancel
Save