Clone of mesa.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

cell_gen_fp.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. /**************************************************************************
  2. *
  3. * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /**
  28. * Generate SPU fragment program/shader code.
  29. *
  30. * Note that we generate SOA-style code here. So each TGSI instruction
  31. * operates on four pixels (and is translated into four SPU instructions,
  32. * generally speaking).
  33. *
  34. * \author Brian Paul
  35. */
  36. #include "pipe/p_defines.h"
  37. #include "pipe/p_state.h"
  38. #include "pipe/p_shader_tokens.h"
  39. #include "tgsi/tgsi_parse.h"
  40. #include "tgsi/tgsi_util.h"
  41. #include "tgsi/tgsi_exec.h"
  42. #include "tgsi/tgsi_dump.h"
  43. #include "rtasm/rtasm_ppc_spe.h"
  44. #include "util/u_memory.h"
  45. #include "cell_context.h"
  46. #include "cell_gen_fp.h"
  47. /** Set to 1 to enable debug/disassembly printfs */
  48. #define DISASSEM 01
  49. /**
  50. * Context needed during code generation.
  51. */
  52. struct codegen
  53. {
  54. int inputs_reg; /**< 1st function parameter */
  55. int outputs_reg; /**< 2nd function parameter */
  56. int constants_reg; /**< 3rd function parameter */
  57. int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
  58. int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
  59. /** Per-instruction temps / intermediate temps */
  60. int num_itemps;
  61. int itemps[3];
  62. struct spe_function *f;
  63. boolean error;
  64. };
  65. /**
  66. * Allocate an intermediate temporary register.
  67. */
  68. static int
  69. get_itemp(struct codegen *gen)
  70. {
  71. int t = spe_allocate_available_register(gen->f);
  72. assert(gen->num_itemps < Elements(gen->itemps));
  73. gen->itemps[gen->num_itemps++] = t;
  74. return t;
  75. }
  76. /**
  77. * Free all intermediate temporary registers. To be called after each
  78. * instruction has been emitted.
  79. */
  80. static void
  81. free_itemps(struct codegen *gen)
  82. {
  83. int i;
  84. for (i = 0; i < gen->num_itemps; i++) {
  85. spe_release_register(gen->f, gen->itemps[i]);
  86. }
  87. gen->num_itemps = 0;
  88. }
  89. /**
  90. * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
  91. * The register is allocated and initialized upon the first call.
  92. */
  93. static int
  94. get_const_one_reg(struct codegen *gen)
  95. {
  96. if (gen->one_reg <= 0) {
  97. gen->one_reg = spe_allocate_available_register(gen->f);
  98. }
  99. /* one = {1.0, 1.0, 1.0, 1.0} */
  100. spe_load_float(gen->f, gen->one_reg, 1.0f);
  101. #if DISASSEM
  102. printf("il\tr%d, 1.0f\n", gen->one_reg);
  103. #endif
  104. return gen->one_reg;
  105. }
  106. /**
  107. * Return the index of the SPU temporary containing the named TGSI
  108. * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
  109. * just return the corresponding SPE register. If the TGIS register
  110. * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
  111. * and emit an SPE load instruction.
  112. */
  113. static int
  114. get_src_reg(struct codegen *gen,
  115. int channel,
  116. const struct tgsi_full_src_register *src)
  117. {
  118. int reg;
  119. int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
  120. assert(swizzle >= 0);
  121. assert(swizzle <= 3);
  122. channel = swizzle;
  123. switch (src->SrcRegister.File) {
  124. case TGSI_FILE_TEMPORARY:
  125. reg = gen->temp_regs[src->SrcRegister.Index][channel];
  126. break;
  127. case TGSI_FILE_INPUT:
  128. {
  129. /* offset is measured in quadwords, not bytes */
  130. int offset = src->SrcRegister.Index * 4 + channel;
  131. reg = get_itemp(gen);
  132. /* Load: reg = memory[(machine_reg) + offset] */
  133. spe_lqd(gen->f, reg, gen->inputs_reg, offset);
  134. #if DISASSEM
  135. printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
  136. #endif
  137. }
  138. break;
  139. case TGSI_FILE_IMMEDIATE:
  140. /* xxx fall-through for now / fix */
  141. case TGSI_FILE_CONSTANT:
  142. /* xxx fall-through for now / fix */
  143. default:
  144. assert(0);
  145. }
  146. return reg;
  147. }
  148. /**
  149. * Return the index of an SPE register to use for the given TGSI register.
  150. * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
  151. * corresponding SPE register is returned. If the TGSI register is
  152. * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
  153. * See store_dest_reg() below...
  154. */
  155. static int
  156. get_dst_reg(struct codegen *gen,
  157. int channel,
  158. const struct tgsi_full_dst_register *dest)
  159. {
  160. int reg;
  161. switch (dest->DstRegister.File) {
  162. case TGSI_FILE_TEMPORARY:
  163. reg = gen->temp_regs[dest->DstRegister.Index][channel];
  164. break;
  165. case TGSI_FILE_OUTPUT:
  166. reg = get_itemp(gen);
  167. break;
  168. default:
  169. assert(0);
  170. }
  171. return reg;
  172. }
  173. /**
  174. * When a TGSI instruction is writing to an output register, this
  175. * function emits the SPE store instruction to store the value_reg.
  176. * \param value_reg the SPE register containing the value to store.
  177. * This would have been returned by get_dst_reg().
  178. */
  179. static void
  180. store_dest_reg(struct codegen *gen,
  181. int value_reg, int channel,
  182. const struct tgsi_full_dst_register *dest)
  183. {
  184. switch (dest->DstRegister.File) {
  185. case TGSI_FILE_TEMPORARY:
  186. /* no-op */
  187. break;
  188. case TGSI_FILE_OUTPUT:
  189. {
  190. /* offset is measured in quadwords, not bytes */
  191. int offset = dest->DstRegister.Index * 4 + channel;
  192. /* Store: memory[(machine_reg) + offset] = reg */
  193. spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
  194. #if DISASSEM
  195. printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
  196. #endif
  197. }
  198. break;
  199. default:
  200. assert(0);
  201. }
  202. }
  203. static boolean
  204. emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
  205. {
  206. int ch;
  207. for (ch = 0; ch < 4; ch++) {
  208. if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
  209. int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
  210. int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
  211. /* XXX we don't always need to actually emit a mov instruction here */
  212. spe_move(gen->f, dst_reg, src_reg);
  213. #if DISASSEM
  214. printf("mov\tr%d, r%d\n", dst_reg, src_reg);
  215. #endif
  216. store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
  217. free_itemps(gen);
  218. }
  219. }
  220. return true;
  221. }
  222. /**
  223. * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
  224. * becomes (up to) four SPU "fa" instructions because we're doing SOA
  225. * processing.
  226. */
  227. static boolean
  228. emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
  229. {
  230. int ch;
  231. /* Loop over Red/Green/Blue/Alpha channels */
  232. for (ch = 0; ch < 4; ch++) {
  233. /* If the dest R, G, B or A writemask is enabled... */
  234. if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
  235. /* get indexes of the two src, one dest SPE registers */
  236. int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
  237. int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
  238. int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
  239. /* Emit actual SPE instruction: d = s1 + s2 */
  240. spe_fa(gen->f, d_reg, s1_reg, s2_reg);
  241. #if DISASSEM
  242. printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
  243. #endif
  244. /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
  245. store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
  246. /* Free any intermediate temps we allocated */
  247. free_itemps(gen);
  248. }
  249. }
  250. return true;
  251. }
  252. /**
  253. * Emit multiply. See emit_ADD for comments.
  254. */
  255. static boolean
  256. emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
  257. {
  258. int ch;
  259. for (ch = 0; ch < 4; ch++) {
  260. if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
  261. int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
  262. int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
  263. int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
  264. /* d = s1 * s2 */
  265. spe_fm(gen->f, d_reg, s1_reg, s2_reg);
  266. #if DISASSEM
  267. printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
  268. #endif
  269. store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
  270. free_itemps(gen);
  271. }
  272. }
  273. return true;
  274. }
  275. /**
  276. * Emit set-if-greater-than.
  277. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
  278. * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
  279. * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
  280. */
  281. static boolean
  282. emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
  283. {
  284. int ch;
  285. for (ch = 0; ch < 4; ch++) {
  286. if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
  287. int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
  288. int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
  289. int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
  290. /* d = (s1 > s2) */
  291. spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
  292. #if DISASSEM
  293. printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
  294. #endif
  295. /* convert d from 0x0/0xffffffff to 0.0/1.0 */
  296. /* d = d & one_reg */
  297. spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
  298. #if DISASSEM
  299. printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
  300. #endif
  301. store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
  302. free_itemps(gen);
  303. }
  304. }
  305. return true;
  306. }
  307. /**
  308. * Emit END instruction.
  309. * We just return from the shader function at this point.
  310. *
  311. * Note that there may be more code after this that would be
  312. * called by TGSI_OPCODE_CALL.
  313. */
  314. static boolean
  315. emit_END(struct codegen *gen)
  316. {
  317. /* return from function call */
  318. spe_bi(gen->f, SPE_REG_RA, 0, 0);
  319. #if DISASSEM
  320. printf("bi\trRA\n");
  321. #endif
  322. return true;
  323. }
  324. /**
  325. * Emit code for the given instruction. Just a big switch stmt.
  326. */
  327. static boolean
  328. emit_instruction(struct codegen *gen,
  329. const struct tgsi_full_instruction *inst)
  330. {
  331. switch (inst->Instruction.Opcode) {
  332. case TGSI_OPCODE_MOV:
  333. return emit_MOV(gen, inst);
  334. case TGSI_OPCODE_MUL:
  335. return emit_MUL(gen, inst);
  336. case TGSI_OPCODE_ADD:
  337. return emit_ADD(gen, inst);
  338. case TGSI_OPCODE_SGT:
  339. return emit_SGT(gen, inst);
  340. case TGSI_OPCODE_END:
  341. return emit_END(gen);
  342. /* XXX lots more cases to do... */
  343. default:
  344. return false;
  345. }
  346. return true;
  347. }
  348. /**
  349. * Emit "code" for a TGSI declaration.
  350. * We only care about TGSI TEMPORARY register declarations at this time.
  351. * For each TGSI TEMPORARY we allocate four SPE registers.
  352. */
  353. static void
  354. emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
  355. {
  356. int i, ch;
  357. switch (decl->Declaration.File) {
  358. case TGSI_FILE_TEMPORARY:
  359. #if DISASSEM
  360. printf("Declare temp reg %d .. %d\n",
  361. decl->DeclarationRange.First,
  362. decl->DeclarationRange.Last);
  363. #endif
  364. for (i = decl->DeclarationRange.First;
  365. i <= decl->DeclarationRange.Last;
  366. i++) {
  367. for (ch = 0; ch < 4; ch++) {
  368. gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
  369. }
  370. /* XXX if we run out of SPE registers, we need to spill
  371. * to SPU memory. someday...
  372. */
  373. #if DISASSEM
  374. printf(" SPE regs: %d %d %d %d\n",
  375. gen->temp_regs[i][0],
  376. gen->temp_regs[i][1],
  377. gen->temp_regs[i][2],
  378. gen->temp_regs[i][3]);
  379. #endif
  380. }
  381. break;
  382. default:
  383. ; /* ignore */
  384. }
  385. }
  386. /**
  387. * Translate TGSI shader code to SPE instructions. This is done when
  388. * the state tracker gives us a new shader (via pipe->create_fs_state()).
  389. *
  390. * \param cell the rendering context (in)
  391. * \param tokens the TGSI shader (in)
  392. * \param f the generated function (out)
  393. */
  394. boolean
  395. cell_gen_fragment_program(struct cell_context *cell,
  396. const struct tgsi_token *tokens,
  397. struct spe_function *f)
  398. {
  399. struct tgsi_parse_context parse;
  400. struct codegen gen;
  401. memset(&gen, 0, sizeof(gen));
  402. gen.f = f;
  403. /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
  404. gen.inputs_reg = 3; /* pointer to inputs array */
  405. gen.outputs_reg = 4; /* pointer to outputs array */
  406. gen.constants_reg = 5; /* pointer to constants array */
  407. spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
  408. spe_allocate_register(f, gen.inputs_reg);
  409. spe_allocate_register(f, gen.outputs_reg);
  410. spe_allocate_register(f, gen.constants_reg);
  411. #if DISASSEM
  412. printf("Begin %s\n", __FUNCTION__);
  413. tgsi_dump(tokens, 0);
  414. #endif
  415. tgsi_parse_init(&parse, tokens);
  416. while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
  417. tgsi_parse_token(&parse);
  418. switch (parse.FullToken.Token.Type) {
  419. case TGSI_TOKEN_TYPE_IMMEDIATE:
  420. #if 0
  421. if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
  422. goto fail;
  423. #endif
  424. break;
  425. case TGSI_TOKEN_TYPE_DECLARATION:
  426. emit_declaration(&gen, &parse.FullToken.FullDeclaration);
  427. break;
  428. case TGSI_TOKEN_TYPE_INSTRUCTION:
  429. if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
  430. gen.error = true;
  431. }
  432. break;
  433. default:
  434. assert(0);
  435. }
  436. }
  437. if (gen.error) {
  438. /* terminate the SPE code */
  439. return emit_END(&gen);
  440. }
  441. #if DISASSEM
  442. printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
  443. printf("End %s\n", __FUNCTION__);
  444. #endif
  445. tgsi_parse_free( &parse );
  446. return !gen.error;
  447. }