Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

r3xx_vertprog.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. /*
  2. * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * on the rights to use, copy, modify, merge, publish, distribute, sub
  8. * license, and/or sell copies of the Software, and to permit persons to whom
  9. * the Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21. * USE OR OTHER DEALINGS IN THE SOFTWARE. */
  22. #include "radeon_compiler.h"
  23. #include "../r300_reg.h"
  24. #include "radeon_nqssadce.h"
  25. #include "radeon_program.h"
  26. #include "radeon_program_alu.h"
  27. #include "shader/prog_print.h"
  28. /*
  29. * Take an already-setup and valid source then swizzle it appropriately to
  30. * obtain a constant ZERO or ONE source.
  31. */
  32. #define __CONST(x, y) \
  33. (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
  34. t_swizzle(y), \
  35. t_swizzle(y), \
  36. t_swizzle(y), \
  37. t_swizzle(y), \
  38. t_src_class(vpi->SrcReg[x].File), \
  39. NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
  40. static unsigned long t_dst_mask(GLuint mask)
  41. {
  42. /* WRITEMASK_* is equivalent to VSF_FLAG_* */
  43. return mask & WRITEMASK_XYZW;
  44. }
  45. static unsigned long t_dst_class(gl_register_file file)
  46. {
  47. switch (file) {
  48. case PROGRAM_TEMPORARY:
  49. return PVS_DST_REG_TEMPORARY;
  50. case PROGRAM_OUTPUT:
  51. return PVS_DST_REG_OUT;
  52. case PROGRAM_ADDRESS:
  53. return PVS_DST_REG_A0;
  54. /*
  55. case PROGRAM_INPUT:
  56. case PROGRAM_LOCAL_PARAM:
  57. case PROGRAM_ENV_PARAM:
  58. case PROGRAM_NAMED_PARAM:
  59. case PROGRAM_STATE_VAR:
  60. case PROGRAM_WRITE_ONLY:
  61. case PROGRAM_ADDRESS:
  62. */
  63. default:
  64. fprintf(stderr, "problem in %s", __FUNCTION__);
  65. _mesa_exit(-1);
  66. return -1;
  67. }
  68. }
  69. static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
  70. struct prog_dst_register *dst)
  71. {
  72. if (dst->File == PROGRAM_OUTPUT)
  73. return vp->outputs[dst->Index];
  74. return dst->Index;
  75. }
  76. static unsigned long t_src_class(gl_register_file file)
  77. {
  78. switch (file) {
  79. case PROGRAM_TEMPORARY:
  80. return PVS_SRC_REG_TEMPORARY;
  81. case PROGRAM_INPUT:
  82. return PVS_SRC_REG_INPUT;
  83. case PROGRAM_LOCAL_PARAM:
  84. case PROGRAM_ENV_PARAM:
  85. case PROGRAM_NAMED_PARAM:
  86. case PROGRAM_CONSTANT:
  87. case PROGRAM_STATE_VAR:
  88. return PVS_SRC_REG_CONSTANT;
  89. /*
  90. case PROGRAM_OUTPUT:
  91. case PROGRAM_WRITE_ONLY:
  92. case PROGRAM_ADDRESS:
  93. */
  94. default:
  95. fprintf(stderr, "problem in %s", __FUNCTION__);
  96. _mesa_exit(-1);
  97. return -1;
  98. }
  99. }
  100. static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
  101. {
  102. unsigned long aclass = t_src_class(a.File);
  103. unsigned long bclass = t_src_class(b.File);
  104. if (aclass != bclass)
  105. return GL_FALSE;
  106. if (aclass == PVS_SRC_REG_TEMPORARY)
  107. return GL_FALSE;
  108. if (a.RelAddr || b.RelAddr)
  109. return GL_TRUE;
  110. if (a.Index != b.Index)
  111. return GL_TRUE;
  112. return GL_FALSE;
  113. }
  114. static INLINE unsigned long t_swizzle(GLubyte swizzle)
  115. {
  116. /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
  117. return swizzle;
  118. }
  119. static unsigned long t_src_index(struct r300_vertex_program_code *vp,
  120. struct prog_src_register *src)
  121. {
  122. if (src->File == PROGRAM_INPUT) {
  123. assert(vp->inputs[src->Index] != -1);
  124. return vp->inputs[src->Index];
  125. } else {
  126. if (src->Index < 0) {
  127. fprintf(stderr,
  128. "negative offsets for indirect addressing do not work.\n");
  129. return 0;
  130. }
  131. return src->Index;
  132. }
  133. }
  134. /* these two functions should probably be merged... */
  135. static unsigned long t_src(struct r300_vertex_program_code *vp,
  136. struct prog_src_register *src)
  137. {
  138. /* src->Negate uses the NEGATE_ flags from program_instruction.h,
  139. * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
  140. */
  141. return PVS_SRC_OPERAND(t_src_index(vp, src),
  142. t_swizzle(GET_SWZ(src->Swizzle, 0)),
  143. t_swizzle(GET_SWZ(src->Swizzle, 1)),
  144. t_swizzle(GET_SWZ(src->Swizzle, 2)),
  145. t_swizzle(GET_SWZ(src->Swizzle, 3)),
  146. t_src_class(src->File),
  147. src->Negate) | (src->RelAddr << 4);
  148. }
  149. static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
  150. struct prog_src_register *src)
  151. {
  152. /* src->Negate uses the NEGATE_ flags from program_instruction.h,
  153. * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
  154. */
  155. return PVS_SRC_OPERAND(t_src_index(vp, src),
  156. t_swizzle(GET_SWZ(src->Swizzle, 0)),
  157. t_swizzle(GET_SWZ(src->Swizzle, 0)),
  158. t_swizzle(GET_SWZ(src->Swizzle, 0)),
  159. t_swizzle(GET_SWZ(src->Swizzle, 0)),
  160. t_src_class(src->File),
  161. src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
  162. (src->RelAddr << 4);
  163. }
  164. static GLboolean valid_dst(struct r300_vertex_program_code *vp,
  165. struct prog_dst_register *dst)
  166. {
  167. if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
  168. return GL_FALSE;
  169. } else if (dst->File == PROGRAM_ADDRESS) {
  170. assert(dst->Index == 0);
  171. }
  172. return GL_TRUE;
  173. }
  174. static void ei_vector1(struct r300_vertex_program_code *vp,
  175. GLuint hw_opcode,
  176. struct prog_instruction *vpi,
  177. GLuint * inst)
  178. {
  179. inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
  180. GL_FALSE,
  181. GL_FALSE,
  182. t_dst_index(vp, &vpi->DstReg),
  183. t_dst_mask(vpi->DstReg.WriteMask),
  184. t_dst_class(vpi->DstReg.File));
  185. inst[1] = t_src(vp, &vpi->SrcReg[0]);
  186. inst[2] = __CONST(0, SWIZZLE_ZERO);
  187. inst[3] = __CONST(0, SWIZZLE_ZERO);
  188. }
  189. static void ei_vector2(struct r300_vertex_program_code *vp,
  190. GLuint hw_opcode,
  191. struct prog_instruction *vpi,
  192. GLuint * inst)
  193. {
  194. inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
  195. GL_FALSE,
  196. GL_FALSE,
  197. t_dst_index(vp, &vpi->DstReg),
  198. t_dst_mask(vpi->DstReg.WriteMask),
  199. t_dst_class(vpi->DstReg.File));
  200. inst[1] = t_src(vp, &vpi->SrcReg[0]);
  201. inst[2] = t_src(vp, &vpi->SrcReg[1]);
  202. inst[3] = __CONST(1, SWIZZLE_ZERO);
  203. }
  204. static void ei_math1(struct r300_vertex_program_code *vp,
  205. GLuint hw_opcode,
  206. struct prog_instruction *vpi,
  207. GLuint * inst)
  208. {
  209. inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
  210. GL_TRUE,
  211. GL_FALSE,
  212. t_dst_index(vp, &vpi->DstReg),
  213. t_dst_mask(vpi->DstReg.WriteMask),
  214. t_dst_class(vpi->DstReg.File));
  215. inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
  216. inst[2] = __CONST(0, SWIZZLE_ZERO);
  217. inst[3] = __CONST(0, SWIZZLE_ZERO);
  218. }
  219. static void ei_lit(struct r300_vertex_program_code *vp,
  220. struct prog_instruction *vpi,
  221. GLuint * inst)
  222. {
  223. //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
  224. inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
  225. GL_TRUE,
  226. GL_FALSE,
  227. t_dst_index(vp, &vpi->DstReg),
  228. t_dst_mask(vpi->DstReg.WriteMask),
  229. t_dst_class(vpi->DstReg.File));
  230. /* NOTE: Users swizzling might not work. */
  231. inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
  232. t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
  233. PVS_SRC_SELECT_FORCE_0, // Z
  234. t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
  235. t_src_class(vpi->SrcReg[0].File),
  236. vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
  237. (vpi->SrcReg[0].RelAddr << 4);
  238. inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
  239. t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
  240. PVS_SRC_SELECT_FORCE_0, // Z
  241. t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
  242. t_src_class(vpi->SrcReg[0].File),
  243. vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
  244. (vpi->SrcReg[0].RelAddr << 4);
  245. inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
  246. t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
  247. PVS_SRC_SELECT_FORCE_0, // Z
  248. t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
  249. t_src_class(vpi->SrcReg[0].File),
  250. vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
  251. (vpi->SrcReg[0].RelAddr << 4);
  252. }
  253. static void ei_mad(struct r300_vertex_program_code *vp,
  254. struct prog_instruction *vpi,
  255. GLuint * inst)
  256. {
  257. inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
  258. GL_FALSE,
  259. GL_TRUE,
  260. t_dst_index(vp, &vpi->DstReg),
  261. t_dst_mask(vpi->DstReg.WriteMask),
  262. t_dst_class(vpi->DstReg.File));
  263. inst[1] = t_src(vp, &vpi->SrcReg[0]);
  264. inst[2] = t_src(vp, &vpi->SrcReg[1]);
  265. inst[3] = t_src(vp, &vpi->SrcReg[2]);
  266. }
  267. static void ei_pow(struct r300_vertex_program_code *vp,
  268. struct prog_instruction *vpi,
  269. GLuint * inst)
  270. {
  271. inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
  272. GL_TRUE,
  273. GL_FALSE,
  274. t_dst_index(vp, &vpi->DstReg),
  275. t_dst_mask(vpi->DstReg.WriteMask),
  276. t_dst_class(vpi->DstReg.File));
  277. inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
  278. inst[2] = __CONST(0, SWIZZLE_ZERO);
  279. inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
  280. }
  281. static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
  282. {
  283. int i;
  284. int cur_reg;
  285. GLuint OutputsWritten, InputsRead;
  286. OutputsWritten = c->Base.Program.OutputsWritten;
  287. InputsRead = c->Base.Program.InputsRead;
  288. cur_reg = -1;
  289. for (i = 0; i < VERT_ATTRIB_MAX; i++) {
  290. if (InputsRead & (1 << i))
  291. c->code->inputs[i] = ++cur_reg;
  292. else
  293. c->code->inputs[i] = -1;
  294. }
  295. cur_reg = 0;
  296. for (i = 0; i < VERT_RESULT_MAX; i++)
  297. c->code->outputs[i] = -1;
  298. assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
  299. if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
  300. c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
  301. }
  302. if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
  303. c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
  304. }
  305. /* If we're writing back facing colors we need to send
  306. * four colors to make front/back face colors selection work.
  307. * If the vertex program doesn't write all 4 colors, lets
  308. * pretend it does by skipping output index reg so the colors
  309. * get written into appropriate output vectors.
  310. */
  311. if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
  312. c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
  313. } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
  314. OutputsWritten & (1 << VERT_RESULT_BFC1)) {
  315. cur_reg++;
  316. }
  317. if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
  318. c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
  319. } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
  320. OutputsWritten & (1 << VERT_RESULT_BFC1)) {
  321. cur_reg++;
  322. }
  323. if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
  324. c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
  325. } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
  326. cur_reg++;
  327. }
  328. if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
  329. c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
  330. } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
  331. cur_reg++;
  332. }
  333. for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
  334. if (OutputsWritten & (1 << i)) {
  335. c->code->outputs[i] = cur_reg++;
  336. }
  337. }
  338. if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
  339. c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
  340. }
  341. }
  342. static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
  343. {
  344. struct rc_instruction *rci;
  345. compiler->code->pos_end = 0; /* Not supported yet */
  346. compiler->code->length = 0;
  347. t_inputs_outputs(compiler);
  348. for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
  349. struct prog_instruction *vpi = &rci->I;
  350. GLuint *inst = compiler->code->body.d + compiler->code->length;
  351. /* Skip instructions writing to non-existing destination */
  352. if (!valid_dst(compiler->code, &vpi->DstReg))
  353. continue;
  354. if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
  355. rc_error(&compiler->Base, "Vertex program has too many instructions\n");
  356. return;
  357. }
  358. switch (vpi->Opcode) {
  359. case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
  360. case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
  361. case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
  362. case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
  363. case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
  364. case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
  365. case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
  366. case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
  367. case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
  368. case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
  369. case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
  370. case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
  371. case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
  372. case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
  373. case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
  374. case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
  375. case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
  376. case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
  377. case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
  378. case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
  379. default:
  380. rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
  381. return;
  382. }
  383. compiler->code->length += 4;
  384. if (compiler->Base.Error)
  385. return;
  386. }
  387. }
  388. struct temporary_allocation {
  389. GLuint Allocated:1;
  390. GLuint HwTemp:15;
  391. struct rc_instruction * LastRead;
  392. };
  393. static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
  394. {
  395. struct rc_instruction *inst;
  396. GLuint num_orig_temps = 0;
  397. GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
  398. struct temporary_allocation * ta;
  399. GLuint i, j;
  400. compiler->code->num_temporaries = 0;
  401. memset(hwtemps, 0, sizeof(hwtemps));
  402. /* Pass 1: Count original temporaries and allocate structures */
  403. for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
  404. GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
  405. GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
  406. for (i = 0; i < numsrcs; ++i) {
  407. if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
  408. if (inst->I.SrcReg[i].Index >= num_orig_temps)
  409. num_orig_temps = inst->I.SrcReg[i].Index + 1;
  410. }
  411. }
  412. if (numdsts) {
  413. if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
  414. if (inst->I.DstReg.Index >= num_orig_temps)
  415. num_orig_temps = inst->I.DstReg.Index + 1;
  416. }
  417. }
  418. }
  419. ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
  420. sizeof(struct temporary_allocation) * num_orig_temps);
  421. memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
  422. /* Pass 2: Determine original temporary lifetimes */
  423. for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
  424. GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
  425. for (i = 0; i < numsrcs; ++i) {
  426. if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
  427. ta[inst->I.SrcReg[i].Index].LastRead = inst;
  428. }
  429. }
  430. /* Pass 3: Register allocation */
  431. for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
  432. GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
  433. GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
  434. for (i = 0; i < numsrcs; ++i) {
  435. if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
  436. GLuint orig = inst->I.SrcReg[i].Index;
  437. inst->I.SrcReg[i].Index = ta[orig].HwTemp;
  438. if (ta[orig].Allocated && inst == ta[orig].LastRead)
  439. hwtemps[ta[orig].HwTemp] = GL_FALSE;
  440. }
  441. }
  442. if (numdsts) {
  443. if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
  444. GLuint orig = inst->I.DstReg.Index;
  445. if (!ta[orig].Allocated) {
  446. for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
  447. if (!hwtemps[j])
  448. break;
  449. }
  450. if (j >= VSF_MAX_FRAGMENT_TEMPS) {
  451. fprintf(stderr, "Out of hw temporaries\n");
  452. } else {
  453. ta[orig].Allocated = GL_TRUE;
  454. ta[orig].HwTemp = j;
  455. hwtemps[j] = GL_TRUE;
  456. if (j >= compiler->code->num_temporaries)
  457. compiler->code->num_temporaries = j + 1;
  458. }
  459. }
  460. inst->I.DstReg.Index = ta[orig].HwTemp;
  461. }
  462. }
  463. }
  464. }
  465. /**
  466. * Vertex engine cannot read two inputs or two constants at the same time.
  467. * Introduce intermediate MOVs to temporary registers to account for this.
  468. */
  469. static GLboolean transform_source_conflicts(
  470. struct radeon_compiler *c,
  471. struct rc_instruction* inst,
  472. void* unused)
  473. {
  474. GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
  475. if (num_operands == 3) {
  476. if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
  477. || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
  478. int tmpreg = rc_find_free_temporary(c);
  479. struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
  480. inst_mov->I.Opcode = OPCODE_MOV;
  481. inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
  482. inst_mov->I.DstReg.Index = tmpreg;
  483. inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
  484. reset_srcreg(&inst->I.SrcReg[2]);
  485. inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
  486. inst->I.SrcReg[2].Index = tmpreg;
  487. }
  488. }
  489. if (num_operands >= 2) {
  490. if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
  491. int tmpreg = rc_find_free_temporary(c);
  492. struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
  493. inst_mov->I.Opcode = OPCODE_MOV;
  494. inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
  495. inst_mov->I.DstReg.Index = tmpreg;
  496. inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
  497. reset_srcreg(&inst->I.SrcReg[1]);
  498. inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
  499. inst->I.SrcReg[1].Index = tmpreg;
  500. }
  501. }
  502. return GL_TRUE;
  503. }
  504. static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
  505. {
  506. int i;
  507. for(i = 0; i < 32; ++i) {
  508. if ((compiler->RequiredOutputs & (1 << i)) &&
  509. !(compiler->Base.Program.OutputsWritten & (1 << i))) {
  510. struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
  511. inst->I.Opcode = OPCODE_MOV;
  512. inst->I.DstReg.File = PROGRAM_OUTPUT;
  513. inst->I.DstReg.Index = i;
  514. inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
  515. inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
  516. inst->I.SrcReg[0].Index = 0;
  517. inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
  518. compiler->Base.Program.OutputsWritten |= 1 << i;
  519. }
  520. }
  521. }
  522. static void nqssadceInit(struct nqssadce_state* s)
  523. {
  524. struct r300_vertex_program_compiler * compiler = s->UserData;
  525. int i;
  526. for(i = 0; i < VERT_RESULT_MAX; ++i) {
  527. if (compiler->RequiredOutputs & (1 << i)) {
  528. if (i != VERT_RESULT_PSIZ)
  529. s->Outputs[i].Sourced = WRITEMASK_XYZW;
  530. else
  531. s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */
  532. }
  533. }
  534. }
  535. static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
  536. {
  537. (void) opcode;
  538. (void) reg;
  539. return GL_TRUE;
  540. }
  541. void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
  542. {
  543. rc_mesa_to_rc_program(&compiler->Base, compiler->program);
  544. compiler->program = 0;
  545. if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
  546. rc_copy_output(&compiler->Base,
  547. VERT_RESULT_HPOS,
  548. compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
  549. }
  550. if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
  551. rc_move_output(&compiler->Base,
  552. VERT_RESULT_FOGC,
  553. compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
  554. }
  555. addArtificialOutputs(compiler);
  556. {
  557. struct radeon_program_transformation transformations[] = {
  558. { &r300_transform_vertex_alu, 0 },
  559. };
  560. radeonLocalTransform(&compiler->Base, 1, transformations);
  561. }
  562. if (compiler->Base.Debug) {
  563. fprintf(stderr, "Vertex program after native rewrite:\n");
  564. rc_print_program(&compiler->Base.Program);
  565. fflush(stdout);
  566. }
  567. {
  568. /* Note: This pass has to be done seperately from ALU rewrite,
  569. * otherwise non-native ALU instructions with source conflits
  570. * will not be treated properly.
  571. */
  572. struct radeon_program_transformation transformations[] = {
  573. { &transform_source_conflicts, 0 },
  574. };
  575. radeonLocalTransform(&compiler->Base, 1, transformations);
  576. }
  577. if (compiler->Base.Debug) {
  578. fprintf(stderr, "Vertex program after source conflict resolve:\n");
  579. rc_print_program(&compiler->Base.Program);
  580. fflush(stdout);
  581. }
  582. {
  583. struct radeon_nqssadce_descr nqssadce = {
  584. .Init = &nqssadceInit,
  585. .IsNativeSwizzle = &swizzleIsNative,
  586. .BuildSwizzle = NULL
  587. };
  588. radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
  589. /* We need this step for reusing temporary registers */
  590. allocate_temporary_registers(compiler);
  591. if (compiler->Base.Debug) {
  592. fprintf(stderr, "Vertex program after NQSSADCE:\n");
  593. rc_print_program(&compiler->Base.Program);
  594. fflush(stdout);
  595. }
  596. }
  597. translate_vertex_program(compiler);
  598. rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
  599. compiler->code->InputsRead = compiler->Base.Program.InputsRead;
  600. compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
  601. }