(exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) | (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) | ||||
>; | >; | ||||
/* Other helper patterns */ | |||||
/* --------------------- */ | |||||
/* Extract element pattern */ | |||||
class Extract_Element <ValueType sub_type, ValueType vec_type, | |||||
RegisterClass vec_class, int sub_idx, | |||||
SubRegIndex sub_reg>: Pat< | |||||
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), | |||||
(EXTRACT_SUBREG vec_class:$src, sub_reg) | |||||
>; | |||||
/* Insert element pattern */ | |||||
class Insert_Element <ValueType elem_type, ValueType vec_type, | |||||
RegisterClass elem_class, RegisterClass vec_class, | |||||
int sub_idx, SubRegIndex sub_reg> : Pat < | |||||
(vec_type (vector_insert (vec_type vec_class:$vec), | |||||
(elem_type elem_class:$elem), sub_idx)), | |||||
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) | |||||
>; | |||||
include "R600Instructions.td" | include "R600Instructions.td" | ||||
include "SIInstrInfo.td" | include "SIInstrInfo.td" |
switch (MI.getOpcode()) { | switch (MI.getOpcode()) { | ||||
default: break; | default: break; | ||||
/* Custom swizzle instructions, ignore the last two operands */ | |||||
case AMDIL::SET_CHAN: | |||||
numOperands = 2; | |||||
break; | |||||
case AMDIL::VEXTRACT_v4f32: | |||||
numOperands = 2; | |||||
break; | |||||
/* XXX: Temp Hack */ | /* XXX: Temp Hack */ | ||||
case AMDIL::STORE_OUTPUT: | case AMDIL::STORE_OUTPUT: | ||||
numOperands = 2; | numOperands = 2; | ||||
if (isReduction) { | if (isReduction) { | ||||
emitByte(reductionElement); | emitByte(reductionElement); | ||||
} else if (MO.isReg()) { | } else if (MO.isReg()) { | ||||
const MachineInstr * parent = MO.getParent(); | |||||
/* The source channel for EXTRACT is stored in operand 2. */ | |||||
if (parent->getOpcode() == AMDIL::VEXTRACT_v4f32) { | |||||
emitByte(parent->getOperand(2).getImm()); | |||||
} else { | |||||
emitByte(TRI->getHWRegChan(MO.getReg())); | |||||
} | |||||
emitByte(TRI->getHWRegChan(MO.getReg())); | |||||
} else { | } else { | ||||
emitByte(0); | emitByte(0); | ||||
} | } | ||||
const MachineInstr * parent = MO.getParent(); | const MachineInstr * parent = MO.getParent(); | ||||
if (isReduction) { | if (isReduction) { | ||||
emitByte(reductionElement); | emitByte(reductionElement); | ||||
/* The destination element for SET_CHAN is stored in the 3rd operand. */ | |||||
} else if (parent->getOpcode() == AMDIL::SET_CHAN) { | |||||
emitByte(parent->getOperand(2).getImm()); | |||||
} else if (parent->getOpcode() == AMDIL::VCREATE_v4f32) { | } else if (parent->getOpcode() == AMDIL::VCREATE_v4f32) { | ||||
emitByte(ELEMENT_X); | emitByte(ELEMENT_X); | ||||
} else { | } else { | ||||
case AMDIL::STORE_OUTPUT: | case AMDIL::STORE_OUTPUT: | ||||
case AMDIL::VCREATE_v4i32: | case AMDIL::VCREATE_v4i32: | ||||
case AMDIL::VCREATE_v4f32: | case AMDIL::VCREATE_v4f32: | ||||
case AMDIL::VEXTRACT_v4f32: | |||||
case AMDIL::VINSERT_v4f32: | |||||
case AMDIL::LOADCONST_i32: | case AMDIL::LOADCONST_i32: | ||||
case AMDIL::LOADCONST_f32: | case AMDIL::LOADCONST_f32: | ||||
case AMDIL::MOVE_v4i32: | case AMDIL::MOVE_v4i32: | ||||
case AMDIL::SET_CHAN: | |||||
/* Instructons to reinterpret bits as ... */ | /* Instructons to reinterpret bits as ... */ | ||||
case AMDIL::IL_ASINT_f32: | case AMDIL::IL_ASINT_f32: | ||||
case AMDIL::IL_ASINT_i32: | case AMDIL::IL_ASINT_i32: |
{ | { | ||||
setOperationAction(ISD::MUL, MVT::i64, Expand); | setOperationAction(ISD::MUL, MVT::i64, Expand); | ||||
// setSchedulingPreference(Sched::VLIW); | // setSchedulingPreference(Sched::VLIW); | ||||
addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass); | |||||
addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass); | |||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); | |||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); | |||||
} | } | ||||
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( | MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( |
[] | [] | ||||
>; | >; | ||||
def SET_CHAN : AMDGPUShaderInst < | |||||
(outs R600_Reg128:$dst), | |||||
(ins R600_Reg32:$src0, i32imm:$src1), | |||||
"SET_CHAN $dst, $src0, $src1", | |||||
[] | |||||
>; | |||||
def MULLIT : AMDGPUShaderInst < | def MULLIT : AMDGPUShaderInst < | ||||
(outs R600_Reg128:$dst), | (outs R600_Reg128:$dst), | ||||
(ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), | (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), | ||||
} //End isPseudo | } //End isPseudo | ||||
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; | |||||
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; | |||||
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; | |||||
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; | |||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>; | |||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>; | |||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>; | |||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>; | |||||
include "R600ShaderPatterns.td" | include "R600ShaderPatterns.td" | ||||
break; | break; | ||||
} | } | ||||
case AMDIL::VEXTRACT_v4f32: | |||||
MI.getOperand(2).setImm(MI.getOperand(2).getImm() - 1); | |||||
continue; | |||||
case AMDIL::NEGATE_i32: | case AMDIL::NEGATE_i32: | ||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT)) | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT)) | ||||
.addOperand(MI.getOperand(0)) | .addOperand(MI.getOperand(0)) | ||||
break; | break; | ||||
} | } | ||||
case AMDIL::VINSERT_v4f32: | |||||
{ | |||||
int64_t swz = MI.getOperand(4).getImm(); | |||||
int64_t chan; | |||||
switch (swz) { | |||||
case (1 << 0): | |||||
chan = 0; | |||||
break; | |||||
case (1 << 8): | |||||
chan = 1; | |||||
break; | |||||
case (1 << 16): | |||||
chan = 2; | |||||
break; | |||||
case (1 << 24): | |||||
chan = 3; | |||||
break; | |||||
default: | |||||
chan = 0; | |||||
fprintf(stderr, "swizzle: %ld\n", swz); | |||||
abort(); | |||||
break; | |||||
} | |||||
BuildMI(MBB, I, MBB.findDebugLoc(I), | |||||
TM.getInstrInfo()->get(AMDIL::SET_CHAN)) | |||||
.addOperand(MI.getOperand(1)) | |||||
.addOperand(MI.getOperand(2)) | |||||
.addImm(chan); | |||||
BuildMI(MBB, I, MBB.findDebugLoc(I), | |||||
TM.getInstrInfo()->get(AMDIL::COPY)) | |||||
.addOperand(MI.getOperand(0)) | |||||
.addOperand(MI.getOperand(1)); | |||||
break; | |||||
} | |||||
default: | default: | ||||
continue; | continue; | ||||
} | } |
>; | >; | ||||
/* Extract element pattern */ | |||||
class Extract_Element <ValueType sub_type, ValueType vec_type, | |||||
RegisterClass vec_class, int sub_idx, | |||||
SubRegIndex sub_reg>: Pat< | |||||
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), | |||||
(EXTRACT_SUBREG vec_class:$src, sub_reg) | |||||
>; | |||||
def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; | def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; | ||||
def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>; | def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>; | ||||
def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>; | def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>; | ||||
def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>; | def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>; | ||||
class Insert_Element <ValueType elem_type, ValueType vec_type, | |||||
RegisterClass elem_class, RegisterClass vec_class, | |||||
int sub_idx, SubRegIndex sub_reg> : Pat < | |||||
(vec_type (vector_insert (vec_type vec_class:$vec), | |||||
(elem_type elem_class:$elem), sub_idx)), | |||||
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) | |||||
>; | |||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>; | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>; | ||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>; | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>; | ||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>; | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>; |