| @@ -49,17 +49,14 @@ private: | |||
| const R600RegisterInfo * TRI; | |||
| const R600InstrInfo * TII; | |||
| bool IsCube; | |||
| unsigned currentElement; | |||
| bool IsLast; | |||
| unsigned section_start; | |||
| public: | |||
| R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), | |||
| _OS(OS), TM(NULL), IsCube(false), | |||
| IsLast(true) { } | |||
| _OS(OS), TM(NULL) { } | |||
| const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } | |||
| @@ -70,7 +67,7 @@ public: | |||
| private: | |||
| void EmitALUInstr(MachineInstr &MI); | |||
| void EmitSrc(const MachineOperand & MO, int chan_override = -1); | |||
| void EmitSrc(const MachineOperand & MO); | |||
| void EmitDst(const MachineOperand & MO); | |||
| void EmitALU(MachineInstr &MI, unsigned numSrc); | |||
| void EmitTexInstr(MachineInstr &MI); | |||
| @@ -160,7 +157,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { | |||
| for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), | |||
| E = MBB.instr_end(); I != E; ++I) { | |||
| MachineInstr &MI = *I; | |||
| IsCube = TII->isCubeOp(MI.getOpcode()); | |||
| if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { | |||
| continue; | |||
| } | |||
| @@ -168,15 +164,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { | |||
| EmitTexInstr(MI); | |||
| } else if (TII->isFCOp(MI.getOpcode())){ | |||
| EmitFCInstr(MI); | |||
| } else if (IsCube) { | |||
| IsLast = false; | |||
| // XXX: On Cayman, some (all?) of the vector instructions only need | |||
| // to fill the first three slots. | |||
| for (currentElement = 0; currentElement < 4; currentElement++) { | |||
| IsLast = (currentElement == 3); | |||
| EmitALUInstr(MI); | |||
| } | |||
| IsCube = false; | |||
| } else if (MI.getOpcode() == AMDGPU::RETURN || | |||
| MI.getOpcode() == AMDGPU::BUNDLE || | |||
| MI.getOpcode() == AMDGPU::KILL) { | |||
| @@ -250,25 +237,18 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) | |||
| // Emit instruction type | |||
| EmitByte(0); | |||
| if (IsCube) { | |||
| static const int cube_src_swz[] = {2, 2, 0, 1}; | |||
| EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]); | |||
| EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); | |||
| EmitNullBytes(SRC_BYTE_COUNT); | |||
| } else { | |||
| unsigned int opIndex; | |||
| for (opIndex = 1; opIndex < numOperands; opIndex++) { | |||
| // Literal constants are always stored as the last operand. | |||
| if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { | |||
| break; | |||
| } | |||
| EmitSrc(MI.getOperand(opIndex)); | |||
| unsigned int opIndex; | |||
| for (opIndex = 1; opIndex < numOperands; opIndex++) { | |||
| // Literal constants are always stored as the last operand. | |||
| if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { | |||
| break; | |||
| } | |||
| EmitSrc(MI.getOperand(opIndex)); | |||
| } | |||
| // Emit zeros for unused sources | |||
| for ( ; opIndex < 4; opIndex++) { | |||
| EmitNullBytes(SRC_BYTE_COUNT); | |||
| } | |||
| // Emit zeros for unused sources | |||
| for ( ; opIndex < 4; opIndex++) { | |||
| EmitNullBytes(SRC_BYTE_COUNT); | |||
| } | |||
| EmitDst(dstOp); | |||
| @@ -276,7 +256,7 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) | |||
| EmitALU(MI, numOperands - 1); | |||
| } | |||
| void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) | |||
| void R600CodeEmitter::EmitSrc(const MachineOperand & MO) | |||
| { | |||
| uint32_t value = 0; | |||
| // Emit the source select (2 bytes). For GPRs, this is the register index. | |||
| @@ -302,9 +282,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) | |||
| } | |||
| // Emit the source channel (1 byte) | |||
| if (chan_override != -1) { | |||
| EmitByte(chan_override); | |||
| } else if (MO.isReg()) { | |||
| if (MO.isReg()) { | |||
| EmitByte(TRI->getHWRegChan(MO.getReg())); | |||
| } else { | |||
| EmitByte(0); | |||
| @@ -345,11 +323,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) | |||
| EmitByte(getHWReg(MO.getReg())); | |||
| // Emit the element of the destination register (1 byte) | |||
| if (IsCube) { | |||
| EmitByte(currentElement); | |||
| } else { | |||
| EmitByte(TRI->getHWRegChan(MO.getReg())); | |||
| } | |||
| EmitByte(TRI->getHWRegChan(MO.getReg())); | |||
| // Emit isClamped (1 byte) | |||
| if (MO.getTargetFlags() & MO_FLAG_CLAMP) { | |||
| @@ -379,9 +353,8 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc) | |||
| EmitTwoBytes(getBinaryCodeForInstr(MI)); | |||
| // Emit IsLast (for this instruction group) (1 byte) | |||
| if (!IsLast || | |||
| (MI.isInsideBundle() && | |||
| !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) { | |||
| if (MI.isInsideBundle() && | |||
| !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) { | |||
| EmitByte(0); | |||
| } else { | |||
| EmitByte(1); | |||
| @@ -61,7 +61,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { | |||
| bool IsReduction = TII->isReductionOp(MI.getOpcode()); | |||
| bool IsVector = TII->isVector(MI); | |||
| if (!IsReduction && !IsVector) { | |||
| bool IsCube = TII->isCubeOp(MI.getOpcode()); | |||
| if (!IsReduction && !IsVector && !IsCube) { | |||
| continue; | |||
| } | |||
| @@ -82,23 +83,73 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { | |||
| // T0_Y (write masked) = MULLO_INT T1_X, T2_X | |||
| // T0_Z (write masked) = MULLO_INT T1_X, T2_X | |||
| // T0_W (write masked) = MULLO_INT T1_X, T2_X | |||
| // | |||
| // Cube instructions: | |||
| // T0_XYZW = CUBE T1_XYZW | |||
| // becomes: | |||
| // TO_X = CUBE T1_Z, T1_Y | |||
| // T0_Y = CUBE T1_Z, T1_X | |||
| // T0_Z = CUBE T1_X, T1_Z | |||
| // T0_W = CUBE T1_Y, T1_Z | |||
| for (unsigned Chan = 0; Chan < 4; Chan++) { | |||
| unsigned DstReg = MI.getOperand(0).getReg(); | |||
| unsigned Src0 = MI.getOperand(1).getReg(); | |||
| unsigned Src1 = MI.getOperand(2).getReg(); | |||
| unsigned Src1 = 0; | |||
| // Determine the correct source registers | |||
| if (!IsCube) { | |||
| Src1 = MI.getOperand(2).getReg(); | |||
| } | |||
| if (IsReduction) { | |||
| unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); | |||
| Src0 = TRI.getSubReg(Src0, SubRegIndex); | |||
| Src1 = TRI.getSubReg(Src1, SubRegIndex); | |||
| } else if (IsCube) { | |||
| static const int CubeSrcSwz[] = {2, 2, 0, 1}; | |||
| unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); | |||
| unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); | |||
| Src1 = TRI.getSubReg(Src0, SubRegIndex1); | |||
| Src0 = TRI.getSubReg(Src0, SubRegIndex0); | |||
| } | |||
| // Determine the correct destination registers; | |||
| unsigned Flags = 0; | |||
| if (IsCube) { | |||
| unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); | |||
| DstReg = TRI.getSubReg(DstReg, SubRegIndex); | |||
| } else { | |||
| // Mask the write if the original instruction does not write to | |||
| // the current Channel. | |||
| Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); | |||
| unsigned DstBase = TRI.getHWRegIndex(DstReg); | |||
| DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); | |||
| } | |||
| unsigned DstBase = TRI.getHWRegIndex(DstReg); | |||
| unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); | |||
| unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); | |||
| // Set the IsLast bit | |||
| Flags |= (Chan == 3 ? MO_FLAG_LAST : 0); | |||
| MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true); | |||
| // Add the new instruction | |||
| unsigned Opcode; | |||
| if (IsCube) { | |||
| switch (MI.getOpcode()) { | |||
| case AMDGPU::CUBE_r600_pseudo: | |||
| Opcode = AMDGPU::CUBE_r600_real; | |||
| break; | |||
| case AMDGPU::CUBE_eg_pseudo: | |||
| Opcode = AMDGPU::CUBE_eg_real; | |||
| break; | |||
| default: | |||
| assert(!"Unknown CUBE instruction"); | |||
| Opcode = 0; | |||
| break; | |||
| } | |||
| } else { | |||
| Opcode = MI.getOpcode(); | |||
| } | |||
| MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true); | |||
| NewDstOp.addTargetFlag(Flags); | |||
| BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode())) | |||
| BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode)) | |||
| .addOperand(NewDstOp) | |||
| .addReg(Src0) | |||
| .addReg(Src1) | |||
| @@ -153,8 +153,10 @@ bool R600InstrInfo::isCubeOp(unsigned opcode) const | |||
| { | |||
| switch(opcode) { | |||
| default: return false; | |||
| case AMDGPU::CUBE_r600: | |||
| case AMDGPU::CUBE_eg: | |||
| case AMDGPU::CUBE_r600_pseudo: | |||
| case AMDGPU::CUBE_r600_real: | |||
| case AMDGPU::CUBE_eg_pseudo: | |||
| case AMDGPU::CUBE_eg_real: | |||
| return true; | |||
| } | |||
| } | |||
| @@ -593,14 +593,25 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION < | |||
| [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] | |||
| >; | |||
| class CUBE_Common <bits<32> inst> : InstR600 < | |||
| inst, | |||
| (outs R600_Reg128:$dst), | |||
| (ins R600_Reg128:$src), | |||
| "CUBE $dst $src", | |||
| [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], | |||
| VecALU | |||
| >; | |||
| multiclass CUBE_Common <bits<32> inst> { | |||
| def _pseudo : InstR600 < | |||
| inst, | |||
| (outs R600_Reg128:$dst), | |||
| (ins R600_Reg128:$src), | |||
| "CUBE $dst $src", | |||
| [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], | |||
| VecALU | |||
| >; | |||
| def _real : InstR600 < | |||
| inst, | |||
| (outs R600_Reg32:$dst), | |||
| (ins R600_Reg32:$src0, R600_Reg32:$src1), | |||
| "CUBE $dst, $src0, $src1", | |||
| [], VecALU | |||
| >; | |||
| } | |||
| class EXP_IEEE_Common <bits<32> inst> : R600_1OP < | |||
| inst, "EXP_IEEE", | |||
| @@ -737,7 +748,7 @@ let Predicates = [isR600] in { | |||
| def CNDGT_r600 : CNDGT_Common<0x19>; | |||
| def CNDGE_r600 : CNDGE_Common<0x1A>; | |||
| def DOT4_r600 : DOT4_Common<0x50>; | |||
| def CUBE_r600 : CUBE_Common<0x52>; | |||
| defm CUBE_r600 : CUBE_Common<0x52>; | |||
| def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; | |||
| def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; | |||
| def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; | |||
| @@ -853,7 +864,7 @@ let Predicates = [isEGorCayman] in { | |||
| def SIN_eg : SIN_Common<0x8D>; | |||
| def COS_eg : COS_Common<0x8E>; | |||
| def DOT4_eg : DOT4_Common<0xBE>; | |||
| def CUBE_eg : CUBE_Common<0xC0>; | |||
| defm CUBE_eg : CUBE_Common<0xC0>; | |||
| def DIV_eg : DIV_Common<RECIP_IEEE_eg>; | |||
| def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>; | |||