Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.


  1. //===-- R600Instructions.td - TODO: Add brief description -------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // TODO: Add full description
  11. //
  12. //===----------------------------------------------------------------------===//
  13. include "R600Intrinsics.td"
  14. class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern,
  15. InstrItinClass itin>
  16. : AMDGPUInst <outs, ins, asm, pattern> {
  17. field bits<32> Inst;
  18. bit Trig = 0;
  19. bit Op3 = 0;
  20. let Inst = inst;
  21. let Namespace = "AMDIL";
  22. let OutOperandList = outs;
  23. let InOperandList = ins;
  24. let AsmString = asm;
  25. let Pattern = pattern;
  26. let Itinerary = itin;
  27. let TSFlags{4} = Trig;
  28. let TSFlags{5} = Op3;
  29. }
  30. class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
  31. AMDGPUInst <outs, ins, asm, pattern>
  32. {
  33. field bits<64> Inst;
  34. let Namespace = "AMDIL";
  35. }
  36. def MEMri : Operand<iPTRAny> {
  37. let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
  38. }
  39. def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
  40. class R600_ALU {
  41. bits<7> DST_GPR = 0;
  42. bits<9> SRC0_SEL = 0;
  43. bits<1> SRC0_NEG = 0;
  44. bits<9> SRC1_SEL = 0;
  45. bits<1> SRC1_NEG = 0;
  46. bits<1> CLAMP = 0;
  47. }
  48. class R600_1OP <bits<32> inst, string opName, list<dag> pattern,
  49. InstrItinClass itin = AnyALU> :
  50. InstR600 <inst,
  51. (outs R600_Reg32:$dst),
  52. (ins R600_Reg32:$src, variable_ops),
  53. !strconcat(opName, " $dst, $src"),
  54. pattern,
  55. itin
  56. >;
  57. class R600_2OP <bits<32> inst, string opName, list<dag> pattern,
  58. InstrItinClass itin = AnyALU> :
  59. InstR600 <inst,
  60. (outs R600_Reg32:$dst),
  61. (ins R600_Reg32:$src0, R600_Reg32:$src1, variable_ops),
  62. !strconcat(opName, " $dst, $src0, $src1"),
  63. pattern,
  64. itin
  65. >;
  66. class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
  67. InstrItinClass itin = AnyALU> :
  68. InstR600 <inst,
  69. (outs R600_Reg32:$dst),
  70. (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops),
  71. !strconcat(opName, "$dst $src0, $src1, $src2"),
  72. pattern,
  73. itin>{
  74. let Op3 = 1;
  75. }
  76. class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
  77. InstrItinClass itin = AnyALU> :
  78. InstR600 <inst,
  79. (outs R600_Reg32:$dst),
  80. ins,
  81. asm,
  82. pattern,
  83. itin
  84. >;
  85. class R600_TEX <bits<32> inst, string opName, list<dag> pattern,
  86. InstrItinClass itin = AnyALU> :
  87. InstR600 <inst,
  88. (outs R600_Reg128:$dst),
  89. (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2),
  90. !strconcat(opName, "$dst, $src0, $src1, $src2"),
  91. pattern,
  92. itin
  93. >;
  94. def TEX_SHADOW : PatLeaf<
  95. (imm),
  96. [{uint32_t TType = (uint32_t)N->getZExtValue();
  97. return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12;
  98. }]
  99. >;
  100. class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins,
  101. string asm> :
  102. InstR600ISA <outs, ins, asm, []>
  103. {
  104. bits<7> RW_GPR;
  105. bits<7> INDEX_GPR;
  106. bits<4> RAT_ID;
  107. bits<2> RIM;
  108. bits<2> TYPE;
  109. bits<1> RW_REL;
  110. bits<2> ELEM_SIZE;
  111. bits<12> ARRAY_SIZE;
  112. bits<4> COMP_MASK;
  113. bits<4> BURST_COUNT;
  114. bits<1> VPM;
  115. bits<1> EOP;
  116. bits<1> MARK;
  117. bits<1> BARRIER;
  118. /* CF_ALLOC_EXPORT_WORD0_RAT */
  119. let Inst{3-0} = RAT_ID;
  120. let Inst{9-4} = rat_inst;
  121. let Inst{10} = 0; /* Reserved */
  122. let Inst{12-11} = RIM;
  123. let Inst{14-13} = TYPE;
  124. let Inst{21-15} = RW_GPR;
  125. let Inst{22} = RW_REL;
  126. let Inst{29-23} = INDEX_GPR;
  127. let Inst{31-30} = ELEM_SIZE;
  128. /* CF_ALLOC_EXPORT_WORD1_BUF */
  129. /* XXX: We can't have auto encoding of 64-bit instructions until LLVM 3.1 :( */
  130. /*
  131. let Inst{43-32} = ARRAY_SIZE;
  132. let Inst{47-44} = COMP_MASK;
  133. let Inst{51-48} = BURST_COUNT;
  134. let Inst{52} = VPM;
  135. let Inst{53} = EOP;
  136. let Inst{61-54} = cf_inst;
  137. let Inst{62} = MARK;
  138. let Inst{63} = BARRIER;
  139. */
  140. }
  141. /*
  142. def store_global : PatFrag<(ops node:$value, node:$ptr),
  143. (store node:$value, node:$ptr),
  144. [{
  145. const Value *Src;
  146. const PointerType *Type;
  147. if ((src = cast<StoreSDNode>(N)->getSrcValue() &&
  148. PT = dyn_cast<PointerType>(Src->getType()))) {
  149. return PT->getAddressSpace() == 1;
  150. }
  151. return false;
  152. }]>;
  153. */
  154. def load_param : PatFrag<(ops node:$ptr),
  155. (load node:$ptr),
  156. [{
  157. return true;
  158. const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
  159. if (Src) {
  160. PointerType * PT = dyn_cast<PointerType>(Src->getType());
  161. return PT && PT->getAddressSpace() == AMDILAS::PARAM_I_ADDRESS;
  162. }
  163. return false;
  164. }]>;
  165. //class EG_CF <bits<32> inst, string asm> :
  166. // InstR600 <inst, (outs), (ins), asm, []>;
  167. /* XXX: We will use this when we emit the real ISA.
  168. bits<24> ADDR = 0;
  169. bits<3> JTS = 0;
  170. bits<3> PC = 0;
  171. bits<5> CF_CONS = 0;
  172. bits<2> COND = 0;
  173. bits<6> COUNT = 0;
  174. bits<1> VPM = 0;
  175. bits<1> EOP = 0;
  176. bits<8> CF_INST = 0;
  177. bits<1> WQM = 0;
  178. bits<1> B = 0;
  179. let Inst{23-0} = ADDR;
  180. let Inst{26-24} = JTS;
  181. let Inst{34-32} = PC;
  182. let Inst{39-35} = CF_CONST;
  183. let Inst{41-40} = COND;
  184. let Inst{47-42} = COUNT;
  185. let Inst{52} = VPM;
  186. let Inst{53} = EOP;
  187. let Inst{61-54} = CF_INST;
  188. let Inst{62} = WQM;
  189. let Inst{63} = B;
  190. //}
  191. */
  192. def isR600 : Predicate<"Subtarget.device()"
  193. "->getGeneration() == AMDILDeviceInfo::HD4XXX">;
  194. def isEG : Predicate<"Subtarget.device()"
  195. "->getGeneration() >= AMDILDeviceInfo::HD5XXX && "
  196. "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
  197. def isCayman : Predicate<"Subtarget.device()"
  198. "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
  199. def isEGorCayman : Predicate<"Subtarget.device()"
  200. "->getGeneration() >= AMDILDeviceInfo::HD5XXX">;
  201. def isR600toCayman : Predicate<
  202. "Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX">;
  203. let Predicates = [isR600toCayman] in {
  204. /* ------------------------------------------- */
  205. /* Common Instructions R600, R700, Evergreen, Cayman */
  206. /* ------------------------------------------- */
  207. let Gen = AMDGPUGen.R600_CAYMAN in {
  208. def ADD : R600_2OP <
  209. 0x0, "ADD",
  210. [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] > {
  211. let AMDILOp = AMDILInst.ADD_f32;
  212. }
  213. // Non-IEEE MUL: 0 * anything = 0
  214. def MUL : R600_2OP <
  215. 0x1, "MUL NON-IEEE",
  216. [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))]
  217. >;
  218. def MUL_IEEE : R600_2OP <
  219. 0x2, "MUL_IEEE",
  220. [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))]> {
  221. let AMDILOp = AMDILInst.MUL_IEEE_f32;
  222. }
  223. def MAX : R600_2OP <
  224. 0x3, "MAX",
  225. [(set R600_Reg32:$dst, (int_AMDIL_max R600_Reg32:$src0, R600_Reg32:$src1))]> {
  226. let AMDILOp = AMDILInst.MAX_f32;
  227. }
  228. def MIN : R600_2OP <
  229. 0x4, "MIN",
  230. [(set R600_Reg32:$dst, (int_AMDIL_min R600_Reg32:$src0, R600_Reg32:$src1))]> {
  231. let AMDILOp = AMDILInst.MIN_f32;
  232. }
  233. /* For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
  234. * so some of the instruction names don't match the asm string.
  235. * XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
  236. */
  237. def SETE : R600_2OP <
  238. 0x08, "SETE",
  239. [(set R600_Reg32:$dst, (int_AMDGPU_seq R600_Reg32:$src0, R600_Reg32:$src1))]> {
  240. let AMDILOp = AMDILInst.FEQ;
  241. }
  242. def SGT : R600_2OP <
  243. 0x09, "SETGT",
  244. [(set R600_Reg32:$dst, (int_AMDGPU_sgt R600_Reg32:$src0, R600_Reg32:$src1))]
  245. >;
  246. def SGE : R600_2OP <
  247. 0xA, "SETGE",
  248. [(set R600_Reg32:$dst, (int_AMDGPU_sge R600_Reg32:$src0, R600_Reg32:$src1))]> {
  249. let AMDILOp = AMDILInst.FGE;
  250. }
  251. def SNE : R600_2OP <
  252. 0xB, "SETNE",
  253. [(set R600_Reg32:$dst, (int_AMDGPU_sne R600_Reg32:$src0, R600_Reg32:$src1))]> {
  254. let AMDILOp = AMDILInst.FNE;
  255. }
  256. def FRACT : R600_1OP <
  257. 0x10, "FRACT",
  258. []> {
  259. let AMDILOp = AMDILInst.FRAC_f32;
  260. }
  261. def TRUNC : R600_1OP <
  262. 0x11, "TRUNC",
  263. [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
  264. >;
  265. def FLOOR : R600_1OP <
  266. 0x14, "FLOOR",
  267. [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))]
  268. >;
  269. def MOV : R600_1OP <0x19, "MOV", []>;
  270. def KILLGT : R600_2OP <
  271. 0x2D, "KILLGT",
  272. []
  273. >;
  274. def AND_INT : R600_2OP <
  275. 0x30, "AND_INT",
  276. []> {
  277. let AMDILOp = AMDILInst.AND_i32;
  278. }
  279. def XOR_INT : R600_2OP <
  280. 0x32, "XOR_INT",
  281. []
  282. >;
  283. def ADD_INT : R600_2OP <
  284. 0x34, "ADD_INT $dst, $src0, $src1",
  285. []>{
  286. let AMDILOp = AMDILInst.ADD_i32;
  287. }
  288. def SUB_INT : R600_2OP <
  289. 0x35, "SUB_INT $dst, $src0, $src1",
  290. []
  291. >;
  292. def SETE_INT : R600_2OP <
  293. 0x3A, "SETE_INT $dst, $src0, $src1",
  294. []>{
  295. let AMDILOp = AMDILInst.IEQ;
  296. }
  297. def SETGT_INT : R600_2OP <
  298. 0x3B, "SGT_INT $dst, $src0, $src1",
  299. []
  300. >;
  301. def SETGE_INT : R600_2OP <
  302. 0x3C, "SETGE_INT $dst, $src0, $src1",
  303. []>{
  304. let AMDILOp = AMDILInst.IGE;
  305. }
  306. def SETNE_INT : R600_2OP <
  307. 0x3D, "SETNE_INT $dst, $src0, $src1",
  308. []>{
  309. let AMDILOp = AMDILInst.INE;
  310. }
  311. def SETGT_UINT : R600_2OP <
  312. 0x3E, "SETGT_UINT $dst, $src0, $src1",
  313. []>{
  314. let AMDILOp = AMDILInst.UGT;
  315. }
  316. def SETGE_UINT : R600_2OP <
  317. 0x3F, "SETGE_UINT $dst, $src0, $src1",
  318. []>{
  319. let AMDILOp = AMDILInst.UGE;
  320. }
  321. def CNDE_INT : R600_3OP <
  322. 0x1C, "CNDE_INT $dst, $src0, $src1, $src2",
  323. []
  324. >;
  325. /* Texture instructions */
  326. def TEX_SAMPLE : R600_TEX <
  327. 0x10, "TEX_SAMPLE",
  328. [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
  329. >;
  330. def TEX_SAMPLE_C : R600_TEX <
  331. 0x18, "TEX_SAMPLE_C",
  332. [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
  333. >;
  334. def TEX_SAMPLE_L : R600_TEX <
  335. 0x11, "TEX_SAMPLE_L",
  336. [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))]
  337. >;
  338. def TEX_SAMPLE_C_L : R600_TEX <
  339. 0x19, "TEX_SAMPLE_C_L",
  340. [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
  341. >;
  342. def TEX_SAMPLE_LB : R600_TEX <
  343. 0x12, "TEX_SAMPLE_LB",
  344. [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))]
  345. >;
  346. def TEX_SAMPLE_C_LB : R600_TEX <
  347. 0x1A, "TEX_SAMPLE_C_LB",
  348. [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
  349. >;
  350. def TEX_SAMPLE_G : R600_TEX <
  351. 0x14, "TEX_SAMPLE_G",
  352. [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, imm:$src1, imm:$src2))]
  353. >;
  354. def TEX_SAMPLE_C_G : R600_TEX <
  355. 0x1C, "TEX_SAMPLE_C_G",
  356. [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
  357. >;
  358. } // End Gen R600_CAYMAN
  359. def KILP : Pat <
  360. (int_AMDGPU_kilp),
  361. (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
  362. >;
  363. /* Helper classes for common instructions */
  364. class MUL_LIT_Common <bits<32> inst> : R600_3OP <
  365. inst, "MUL_LIT",
  366. []
  367. >;
  368. class MULADD_Common <bits<32> inst> : R600_3OP <
  369. inst, "MULADD",
  370. []> {
  371. let AMDILOp = AMDILInst.MAD_f32;
  372. }
  373. class CNDE_Common <bits<32> inst> : R600_3OP <
  374. inst, "CNDE",
  375. []> {
  376. let AMDILOp = AMDILInst.CMOVLOG_f32;
  377. }
  378. class CNDGT_Common <bits<32> inst> : R600_3OP <
  379. inst, "CNDGT",
  380. []
  381. >;
  382. class CNDGE_Common <bits<32> inst> : R600_3OP <
  383. inst, "CNDGE",
  384. [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
  385. >;
  386. class DOT4_Common <bits<32> inst> : R600_REDUCTION <
  387. inst,
  388. (ins R600_Reg128:$src0, R600_Reg128:$src1),
  389. "DOT4 $dst $src0, $src1",
  390. [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
  391. >;
  392. class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
  393. inst, "EXP_IEEE",
  394. []> {
  395. let AMDILOp = AMDILInst.EXP_f32;
  396. }
  397. class FLT_TO_INT_Common <bits<32> inst> : R600_1OP <
  398. inst, "FLT_TO_INT", []> {
  399. let AMDILOp = AMDILInst.FTOI;
  400. }
  401. class INT_TO_FLT_Common <bits<32> inst> : R600_1OP <
  402. inst, "INT_TO_FLT", []> {
  403. let AMDILOp = AMDILInst.ITOF;
  404. }
  405. class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP <
  406. inst, "LOG_CLAMPED",
  407. []
  408. >;
  409. class LOG_IEEE_Common <bits<32> inst> : R600_1OP <
  410. inst, "LOG_IEEE",
  411. []> {
  412. let AMDILOp = AMDILInst.LOG_f32;
  413. }
  414. class LSHL_Common <bits<32> inst> : R600_2OP <
  415. inst, "LSHL $dst, $src0, $src1",
  416. [] >{
  417. let AMDILOp = AMDILInst.SHL_i32;
  418. }
  419. class LSHR_Common <bits<32> inst> : R600_2OP <
  420. inst, "LSHR $dst, $src0, $src1",
  421. [] >{
  422. let AMDILOp = AMDILInst.USHR_i32;
  423. }
  424. class MULHI_INT_Common <bits<32> inst> : R600_2OP <
  425. inst, "MULHI_INT $dst, $src0, $src1",
  426. [] >{
  427. let AMDILOp = AMDILInst.SMULHI_i32;
  428. }
  429. class MULHI_UINT_Common <bits<32> inst> : R600_2OP <
  430. inst, "MULHI $dst, $src0, $src1",
  431. []
  432. >;
  433. class MULLO_INT_Common <bits<32> inst> : R600_2OP <
  434. inst, "MULLO_INT $dst, $src0, $src1",
  435. [] >{
  436. let AMDILOp = AMDILInst.SMUL_i32;
  437. }
  438. class MULLO_UINT_Common <bits<32> inst> : R600_2OP <
  439. inst, "MULLO_UINT $dst, $src0, $src1",
  440. []
  441. >;
  442. class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP <
  443. inst, "RECIP_CLAMPED",
  444. []
  445. >;
  446. class RECIP_IEEE_Common <bits<32> inst> : R600_1OP <
  447. inst, "RECIP_IEEE",
  448. [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))]> {
  449. let AMDILOp = AMDILInst.RSQ_f32;
  450. }
  451. class RECIP_UINT_Common <bits<32> inst> : R600_1OP <
  452. inst, "RECIP_INT $dst, $src",
  453. []
  454. >;
  455. class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP <
  456. inst, "RECIPSQRT_CLAMPED",
  457. [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))]
  458. >;
  459. class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP <
  460. inst, "RECIPSQRT_IEEE",
  461. []
  462. >;
  463. class SIN_Common <bits<32> inst> : R600_1OP <
  464. inst, "SIN",
  465. []>{
  466. let AMDILOp = AMDILInst.SIN_f32;
  467. let Trig = 1;
  468. }
  469. class COS_Common <bits<32> inst> : R600_1OP <
  470. inst, "COS",
  471. []> {
  472. let AMDILOp = AMDILInst.COS_f32;
  473. let Trig = 1;
  474. }
  475. /* Helper patterns for complex intrinsics */
  476. /* -------------------------------------- */
  477. class DIV_Common <InstR600 recip_ieee> : Pat<
  478. (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
  479. (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
  480. >;
  481. class LRP_Common <InstR600 muladd> : Pat <
  482. (int_AMDGPU_lrp R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
  483. (muladd R600_Reg32:$src0, R600_Reg32:$src1, (MUL (SUB_f32 ONE, R600_Reg32:$src0), R600_Reg32:$src2))
  484. >;
  485. class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat <
  486. (int_AMDGPU_ssg R600_Reg32:$src),
  487. (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE)))
  488. >;
  489. class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
  490. (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
  491. (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
  492. >;
  493. /* ---------------------- */
  494. /* R600 / R700 Only Instructions */
  495. /* ---------------------- */
  496. let Predicates = [isR600] in {
  497. let Gen = AMDGPUGen.R600 in {
  498. def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
  499. def MULADD_r600 : MULADD_Common<0x10>;
  500. def CNDE_r600 : CNDE_Common<0x18>;
  501. def CNDGT_r600 : CNDGT_Common<0x19>;
  502. def CNDGE_r600 : CNDGE_Common<0x1A>;
  503. def DOT4_r600 : DOT4_Common<0x50>;
  504. def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
  505. def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
  506. def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
  507. def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
  508. def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
  509. def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
  510. def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
  511. def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
  512. def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
  513. def SIN_r600 : SIN_Common<0x6E>;
  514. def COS_r600 : COS_Common<0x6F>;
  515. def LSHR_r600 : LSHR_Common<0x71>;
  516. def LSHL_r600 : LSHL_Common<0x72>;
  517. def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
  518. def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
  519. def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
  520. def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
  521. def RECIP_UINT_r600 : RECIP_UINT_Common <0x77>;
  522. } // End AMDGPUGen.R600
  523. def DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
  524. def LRP_r600 : LRP_Common<MULADD_r600>;
  525. def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>;
  526. def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>;
  527. def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
  528. }
  529. /* ----------------- */
  530. /* R700+ Trig helper */
  531. /* ----------------- */
  532. /*
  533. class TRIG_HELPER_r700 <InstR600 trig_inst>: Pat <
  534. (trig_inst R600_Reg32:$src),
  535. (trig_inst (fmul R600_Reg32:$src, (PI))))
  536. >;
  537. */
  538. /* ---------------------- */
  539. /* Evergreen Instructions */
  540. /* ---------------------- */
  541. let Predicates = [isEG] in {
  542. let Gen = AMDGPUGen.EG in {
  543. def RAT_WRITE_CACHELESS_eg :
  544. EG_CF_RAT <0x57, 0x2, (outs), (ins R600_TReg32_X:$rw_gpr,
  545. R600_TReg32_X:$index_gpr, i32imm:$rat_id), "">
  546. {
  547. /*
  548. let Inst{3-0} = RAT_ID;
  549. let Inst{21-15} = RW_GPR;
  550. let Inst{29-23} = INDEX_GPR;
  551. /* Propery of the UAV */
  552. let Inst{31-30} = ELEM_SIZE;
  553. */
  554. let RIM = 0;
  555. /* XXX: Have a separate instruction for non-indexed writes. */
  556. let TYPE = 1;
  557. let RW_REL = 0;
  558. let ELEM_SIZE = 0;
  559. /*
  560. let ARRAY_SIZE = 0;
  561. let COMP_MASK = 1;
  562. let BURST_COUNT = 0;
  563. let VPM = 0;
  564. let EOP = 0;
  565. let MARK = 0;
  566. let BARRIER = 1;
  567. */
  568. }
  569. def VTX_READ_eg : InstR600ISA < (outs R600_TReg32_X:$dst),
  570. (ins R600_TReg32_X:$src, i32imm:$buffer_id),
  571. "VTX_READ_eg $dst, $src", []>
  572. {
  573. /*
  574. bits<7> DST_GPR;
  575. bits<7> SRC_GPR;
  576. bits<8> BUFFER_ID;
  577. */
  578. /* If any of these field below need to be calculated at compile time, and
  579. * a ins operand for them and move them to the list of operands above. */
  580. /* XXX: This instruction is manual encoded, so none of these values are used.
  581. */
  582. /*
  583. bits<5> VC_INST = 0; //VC_INST_FETCH
  584. bits<2> FETCH_TYPE = 2;
  585. bits<1> FETCH_WHOLE_QUAD = 1;
  586. bits<1> SRC_REL = 0;
  587. bits<2> SRC_SEL_X = 0;
  588. bits<6> MEGA_FETCH_COUNT = 4;
  589. */
  590. /*
  591. bits<1> DST_REL = 0;
  592. bits<3> DST_SEL_X = 0;
  593. bits<3> DST_SEL_Y = 7; //Masked
  594. bits<3> DST_SEL_Z = 7; //Masked
  595. bits<3> DST_SEL_W = 7; //Masked
  596. bits<1> USE_CONST_FIELDS = 1; //Masked
  597. bits<6> DATA_FORMAT = 0;
  598. bits<2> NUM_FORMAT_ALL = 0;
  599. bits<1> FORMAT_COMP_ALL = 0;
  600. bits<1> SRF_MODE_ALL = 0;
  601. */
  602. /*
  603. let Inst{4-0} = VC_INST;
  604. let Inst{6-5} = FETCH_TYPE;
  605. let Inst{7} = FETCH_WHOLE_QUAD;
  606. let Inst{15-8} = BUFFER_ID;
  607. let Inst{22-16} = SRC_GPR;
  608. let Inst{23} = SRC_REL;
  609. let Inst{25-24} = SRC_SEL_X;
  610. let Inst{31-26} = MEGA_FETCH_COUNT;
  611. */
  612. /* DST_GPR is OK to leave uncommented, because LLVM 3.0 only prevents you
  613. * from statically setting bits > 31. This field will be set by
  614. * getMachineValueOp which can set bits > 31.
  615. */
  616. // let Inst{32-38} = DST_GPR;
  617. /* XXX: Uncomment for LLVM 3.1 which supports 64-bit instructions */
  618. /*
  619. let Inst{39} = DST_REL;
  620. let Inst{40} = 0; //Reserved
  621. let Inst{43-41} = DST_SEL_X;
  622. let Inst{46-44} = DST_SEL_Y;
  623. let Inst{49-47} = DST_SEL_Z;
  624. let Inst{52-50} = DST_SEL_W;
  625. let Inst{53} = USE_CONST_FIELDS;
  626. let Inst{59-54} = DATA_FORMAT;
  627. let Inst{61-60} = NUM_FORMAT_ALL;
  628. let Inst{62} = FORMAT_COMP_ALL;
  629. let Inst{63} = SRF_MODE_ALL;
  630. */
  631. }
  632. } // End AMDGPUGen.EG
  633. /* XXX: Need to convert PTR to rat_id */
  634. /*
  635. def : Pat <(store_global (f32 R600_Reg32:$value), node:$ptr),
  636. (RAT_WRITE_CACHELESS_eg (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
  637. (f32 R600_Reg32:$value),
  638. sel_x),
  639. (f32 ZERO), 0, R600_Reg32:$ptr)>;
  640. */
  641. class VTX_Param_Read_Pattern <ValueType vt> : Pat <
  642. (vt (load_param ADDRParam:$mem)),
  643. (VTX_READ_eg (i32 R600_Reg32:$mem), 0)>;
  644. def : VTX_Param_Read_Pattern <f32>;
  645. def : VTX_Param_Read_Pattern <i32>;
  646. } // End isEG Predicate
  647. /* ------------------------------- */
  648. /* Evergreen / Cayman Instructions */
  649. /* ------------------------------- */
  650. let Predicates = [isEGorCayman] in {
  651. class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat<
  652. (intr R600_Reg32:$src),
  653. (trig (MUL (MOV (LOADCONST_i32 CONST.TWO_PI_INV)), R600_Reg32:$src))
  654. >;
  655. let Gen = AMDGPUGen.EG_CAYMAN in {
  656. def MULADD_eg : MULADD_Common<0x14>;
  657. def LSHR_eg : LSHR_Common<0x16>;
  658. def LSHL_eg : LSHL_Common<0x17>;
  659. def CNDE_eg : CNDE_Common<0x19>;
  660. def CNDGT_eg : CNDGT_Common<0x1A>;
  661. def CNDGE_eg : CNDGE_Common<0x1B>;
  662. def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
  663. def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50>;
  664. def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
  665. def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
  666. def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
  667. def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
  668. def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
  669. def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
  670. def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
  671. def SIN_eg : SIN_Common<0x8D>;
  672. def COS_eg : COS_Common<0x8E>;
  673. def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
  674. def MULHI_INT_eg : MULHI_INT_Common<0x90>;
  675. def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
  676. def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
  677. def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
  678. def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
  679. def DOT4_eg : DOT4_Common<0xBE>;
  680. } // End AMDGPUGen.EG_CAYMAN
  681. def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
  682. def LRP_eg : LRP_Common<MULADD_eg>;
  683. def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;
  684. def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>;
  685. def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
  686. def : TRIG_eg <SIN_eg, int_AMDGPU_sin>;
  687. def : TRIG_eg <COS_eg, int_AMDGPU_cos>;
  688. }
  689. let Predicates = [isCayman] in {
  690. let Gen = AMDGPUGen.CAYMAN in {
  691. /* XXX: I'm not sure if this opcode is correct. */
  692. def RECIP_UINT_cm : RECIP_UINT_Common<0x77>;
  693. } // End AMDGPUGen.CAYMAN
  694. } // End isCayman
  695. /* Other Instructions */
  696. let isCodeGenOnly = 1 in {
  697. /*
  698. def SWIZZLE : AMDGPUShaderInst <
  699. (outs GPRV4F32:$dst),
  700. (ins GPRV4F32:$src0, i32imm:$src1),
  701. "SWIZZLE $dst, $src0, $src1",
  702. [(set GPRV4F32:$dst, (int_AMDGPU_swizzle GPRV4F32:$src0, imm:$src1))]
  703. >;
  704. */
  705. def LAST : AMDGPUShaderInst <
  706. (outs),
  707. (ins),
  708. "LAST",
  709. []
  710. >;
  711. def GET_CHAN : AMDGPUShaderInst <
  712. (outs R600_Reg32:$dst),
  713. (ins R600_Reg128:$src0, i32imm:$src1),
  714. "GET_CHAN $dst, $src0, $src1",
  715. []
  716. >;
  717. def MULLIT : AMDGPUShaderInst <
  718. (outs R600_Reg128:$dst),
  719. (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
  720. "MULLIT $dst, $src0, $src1",
  721. [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
  722. >;
  723. let usesCustomInserter = 1, isPseudo = 1 in {
  724. class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst <
  725. (outs R600_TReg32:$dst),
  726. (ins),
  727. asm,
  728. [(set R600_TReg32:$dst, (intr))]
  729. >;
  730. def TGID_X : R600PreloadInst <"TGID_X", int_r600_read_tgid_x>;
  731. def TGID_Y : R600PreloadInst <"TGID_Y", int_r600_read_tgid_y>;
  732. def TGID_Z : R600PreloadInst <"TGID_Z", int_r600_read_tgid_z>;
  733. def TIDIG_X : R600PreloadInst <"TIDIG_X", int_r600_read_tidig_x>;
  734. def TIDIG_Y : R600PreloadInst <"TIDIG_Y", int_r600_read_tidig_y>;
  735. def TIDIG_Z : R600PreloadInst <"TIDIG_Z", int_r600_read_tidig_z>;
  736. def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>;
  737. def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>;
  738. def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>;
  739. def GLOBAL_SIZE_X : R600PreloadInst <"GLOBAL_SIZE_X",
  740. int_r600_read_global_size_x>;
  741. def GLOBAL_SIZE_Y : R600PreloadInst <"GLOBAL_SIZE_Y",
  742. int_r600_read_global_size_y>;
  743. def GLOBAL_SIZE_Z : R600PreloadInst <"GLOBAL_SIZE_Z",
  744. int_r600_read_global_size_z>;
  745. def LOCAL_SIZE_X : R600PreloadInst <"LOCAL_SIZE_X",
  746. int_r600_read_local_size_x>;
  747. def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y",
  748. int_r600_read_local_size_y>;
  749. def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z",
  750. int_r600_read_local_size_z>;
  751. } // End usesCustomInserter = 1, isPseudo = 1
  752. } // End isCodeGenOnly = 1
  753. let isPseudo = 1 in {
  754. def LOAD_VTX : AMDGPUShaderInst <
  755. (outs R600_Reg32:$dst),
  756. (ins MEMri:$mem),
  757. "LOAD_VTX",
  758. [(set (i32 R600_Reg32:$dst), (load_param ADDRParam:$mem))]
  759. >;
  760. } //End isPseudo
  761. def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
  762. def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
  763. def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
  764. def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
  765. def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>;
  766. def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>;
  767. def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>;
  768. def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>;
  769. include "R600ShaderPatterns.td"
  770. // We need this pattern to avoid having real registers in PHI nodes.
  771. // For some reason this pattern only works when it comes after the other
  772. // instruction defs.
  773. def : Pat <
  774. (int_R600_load_input imm:$src),
  775. (LOAD_INPUT imm:$src)
  776. >;
  777. } // End isR600toCayman Predicate