Browse Source

radeon/llvm: ExpandSpecialInstrs - Add support for cube instructions

tags/mesa-9.0
Tom Stellard 13 years ago
parent
commit
1cb07bd3b8

+ 17
- 44
src/gallium/drivers/radeon/R600CodeEmitter.cpp View File

@@ -49,17 +49,14 @@ private:
const R600RegisterInfo * TRI;
const R600InstrInfo * TII;

bool IsCube;
unsigned currentElement;
bool IsLast;

unsigned section_start;

public:

R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
_OS(OS), TM(NULL), IsCube(false),
IsLast(true) { }
_OS(OS), TM(NULL) { }

const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }

@@ -70,7 +67,7 @@ public:
private:

void EmitALUInstr(MachineInstr &MI);
void EmitSrc(const MachineOperand & MO, int chan_override = -1);
void EmitSrc(const MachineOperand & MO);
void EmitDst(const MachineOperand & MO);
void EmitALU(MachineInstr &MI, unsigned numSrc);
void EmitTexInstr(MachineInstr &MI);
@@ -160,7 +157,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
E = MBB.instr_end(); I != E; ++I) {
MachineInstr &MI = *I;
IsCube = TII->isCubeOp(MI.getOpcode());
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
continue;
}
@@ -168,15 +164,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
EmitTexInstr(MI);
} else if (TII->isFCOp(MI.getOpcode())){
EmitFCInstr(MI);
} else if (IsCube) {
IsLast = false;
// XXX: On Cayman, some (all?) of the vector instructions only need
// to fill the first three slots.
for (currentElement = 0; currentElement < 4; currentElement++) {
IsLast = (currentElement == 3);
EmitALUInstr(MI);
}
IsCube = false;
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
@@ -250,25 +237,18 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
// Emit instruction type
EmitByte(0);

if (IsCube) {
static const int cube_src_swz[] = {2, 2, 0, 1};
EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
EmitNullBytes(SRC_BYTE_COUNT);
} else {
unsigned int opIndex;
for (opIndex = 1; opIndex < numOperands; opIndex++) {
// Literal constants are always stored as the last operand.
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
break;
}
EmitSrc(MI.getOperand(opIndex));
unsigned int opIndex;
for (opIndex = 1; opIndex < numOperands; opIndex++) {
// Literal constants are always stored as the last operand.
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
break;
}
EmitSrc(MI.getOperand(opIndex));
}

// Emit zeros for unused sources
for ( ; opIndex < 4; opIndex++) {
EmitNullBytes(SRC_BYTE_COUNT);
}
// Emit zeros for unused sources
for ( ; opIndex < 4; opIndex++) {
EmitNullBytes(SRC_BYTE_COUNT);
}

EmitDst(dstOp);
@@ -276,7 +256,7 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
EmitALU(MI, numOperands - 1);
}

void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
{
uint32_t value = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
@@ -302,9 +282,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
}

// Emit the source channel (1 byte)
if (chan_override != -1) {
EmitByte(chan_override);
} else if (MO.isReg()) {
if (MO.isReg()) {
EmitByte(TRI->getHWRegChan(MO.getReg()));
} else {
EmitByte(0);
@@ -345,11 +323,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
EmitByte(getHWReg(MO.getReg()));

// Emit the element of the destination register (1 byte)
if (IsCube) {
EmitByte(currentElement);
} else {
EmitByte(TRI->getHWRegChan(MO.getReg()));
}
EmitByte(TRI->getHWRegChan(MO.getReg()));

// Emit isClamped (1 byte)
if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
@@ -379,9 +353,8 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
EmitTwoBytes(getBinaryCodeForInstr(MI));

// Emit IsLast (for this instruction group) (1 byte)
if (!IsLast ||
(MI.isInsideBundle() &&
!(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) {
if (MI.isInsideBundle() &&
!(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) {
EmitByte(0);
} else {
EmitByte(1);

+ 58
- 7
src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp View File

@@ -61,7 +61,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {

bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);
if (!IsReduction && !IsVector) {
bool IsCube = TII->isCubeOp(MI.getOpcode());
if (!IsReduction && !IsVector && !IsCube) {
continue;
}

@@ -82,23 +83,73 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
// T0_W (write masked) = MULLO_INT T1_X, T2_X
//
// Cube instructions:
// T0_XYZW = CUBE T1_XYZW
// becomes:
// TO_X = CUBE T1_Z, T1_Y
// T0_Y = CUBE T1_Z, T1_X
// T0_Z = CUBE T1_X, T1_Z
// T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Src0 = MI.getOperand(1).getReg();
unsigned Src1 = MI.getOperand(2).getReg();
unsigned Src1 = 0;

// Determine the correct source registers
if (!IsCube) {
Src1 = MI.getOperand(2).getReg();
}
if (IsReduction) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
Src0 = TRI.getSubReg(Src0, SubRegIndex);
Src1 = TRI.getSubReg(Src1, SubRegIndex);
} else if (IsCube) {
static const int CubeSrcSwz[] = {2, 2, 0, 1};
unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
}

// Determine the correct destination registers;
unsigned Flags = 0;
if (IsCube) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
} else {
// Mask the write if the original instruction does not write to
// the current Channel.
Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
unsigned DstBase = TRI.getHWRegIndex(DstReg);
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
}
unsigned DstBase = TRI.getHWRegIndex(DstReg);
unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);

// Set the IsLast bit
Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true);

// Add the new instruction
unsigned Opcode;
if (IsCube) {
switch (MI.getOpcode()) {
case AMDGPU::CUBE_r600_pseudo:
Opcode = AMDGPU::CUBE_r600_real;
break;
case AMDGPU::CUBE_eg_pseudo:
Opcode = AMDGPU::CUBE_eg_real;
break;
default:
assert(!"Unknown CUBE instruction");
Opcode = 0;
break;
}
} else {
Opcode = MI.getOpcode();
}
MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true);
NewDstOp.addTargetFlag(Flags);

BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode))
.addOperand(NewDstOp)
.addReg(Src0)
.addReg(Src1)

+ 4
- 2
src/gallium/drivers/radeon/R600InstrInfo.cpp View File

@@ -153,8 +153,10 @@ bool R600InstrInfo::isCubeOp(unsigned opcode) const
{
switch(opcode) {
default: return false;
case AMDGPU::CUBE_r600:
case AMDGPU::CUBE_eg:
case AMDGPU::CUBE_r600_pseudo:
case AMDGPU::CUBE_r600_real:
case AMDGPU::CUBE_eg_pseudo:
case AMDGPU::CUBE_eg_real:
return true;
}
}

+ 21
- 10
src/gallium/drivers/radeon/R600Instructions.td View File

@@ -593,14 +593,25 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION <
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
>;

class CUBE_Common <bits<32> inst> : InstR600 <
inst,
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src),
"CUBE $dst $src",
[(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
VecALU
>;
multiclass CUBE_Common <bits<32> inst> {

def _pseudo : InstR600 <
inst,
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src),
"CUBE $dst $src",
[(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
VecALU
>;

def _real : InstR600 <
inst,
(outs R600_Reg32:$dst),
(ins R600_Reg32:$src0, R600_Reg32:$src1),
"CUBE $dst, $src0, $src1",
[], VecALU
>;
}

class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
inst, "EXP_IEEE",
@@ -737,7 +748,7 @@ let Predicates = [isR600] in {
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
def CUBE_r600 : CUBE_Common<0x52>;
defm CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
@@ -853,7 +864,7 @@ let Predicates = [isEGorCayman] in {
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def DOT4_eg : DOT4_Common<0xBE>;
def CUBE_eg : CUBE_Common<0xC0>;
defm CUBE_eg : CUBE_Common<0xC0>;

def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;

Loading…
Cancel
Save