Clone of mesa.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

nv30_fragprog.c 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. #include "pipe/p_context.h"
  2. #include "pipe/p_defines.h"
  3. #include "pipe/p_state.h"
  4. #include "util/u_inlines.h"
  5. #include "pipe/p_shader_tokens.h"
  6. #include "tgsi/tgsi_dump.h"
  7. #include "tgsi/tgsi_parse.h"
  8. #include "tgsi/tgsi_util.h"
  9. #include "nv30_context.h"
  10. #define SWZ_X 0
  11. #define SWZ_Y 1
  12. #define SWZ_Z 2
  13. #define SWZ_W 3
  14. #define MASK_X 1
  15. #define MASK_Y 2
  16. #define MASK_Z 4
  17. #define MASK_W 8
  18. #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
  19. #define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
  20. #define DEF_CTEST NV30_FP_OP_COND_TR
  21. #include "nv30_shader.h"
  22. #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
  23. #define neg(s) nv30_sr_neg((s))
  24. #define abs(s) nv30_sr_abs((s))
  25. #define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
  26. #define MAX_CONSTS 128
  27. #define MAX_IMM 32
  28. struct nv30_fpc {
  29. struct nv30_fragment_program *fp;
  30. uint attrib_map[PIPE_MAX_SHADER_INPUTS];
  31. int high_temp;
  32. int temp_temp_count;
  33. int num_regs;
  34. uint depth_id;
  35. uint colour_id;
  36. unsigned inst_offset;
  37. struct {
  38. int pipe;
  39. float vals[4];
  40. } consts[MAX_CONSTS];
  41. int nr_consts;
  42. struct nv30_sreg imm[MAX_IMM];
  43. unsigned nr_imm;
  44. };
  45. static INLINE struct nv30_sreg
  46. temp(struct nv30_fpc *fpc)
  47. {
  48. int idx;
  49. idx = fpc->temp_temp_count++;
  50. idx += fpc->high_temp + 1;
  51. return nv30_sr(NV30SR_TEMP, idx);
  52. }
  53. static INLINE struct nv30_sreg
  54. constant(struct nv30_fpc *fpc, int pipe, float vals[4])
  55. {
  56. int idx;
  57. if (fpc->nr_consts == MAX_CONSTS)
  58. assert(0);
  59. idx = fpc->nr_consts++;
  60. fpc->consts[idx].pipe = pipe;
  61. if (pipe == -1)
  62. memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
  63. return nv30_sr(NV30SR_CONST, idx);
  64. }
  65. #define arith(cc,s,o,d,m,s0,s1,s2) \
  66. nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
  67. (d), (m), (s0), (s1), (s2))
  68. #define tex(cc,s,o,u,d,m,s0,s1,s2) \
  69. nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
  70. (d), (m), (s0), none, none)
  71. static void
  72. grow_insns(struct nv30_fpc *fpc, int size)
  73. {
  74. struct nv30_fragment_program *fp = fpc->fp;
  75. fp->insn_len += size;
  76. fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
  77. }
  78. static void
  79. emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
  80. {
  81. struct nv30_fragment_program *fp = fpc->fp;
  82. uint32_t *hw = &fp->insn[fpc->inst_offset];
  83. uint32_t sr = 0;
  84. switch (src.type) {
  85. case NV30SR_INPUT:
  86. sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
  87. hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
  88. break;
  89. case NV30SR_OUTPUT:
  90. sr |= NV30_FP_REG_SRC_HALF;
  91. /* fall-through */
  92. case NV30SR_TEMP:
  93. sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
  94. sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
  95. break;
  96. case NV30SR_CONST:
  97. grow_insns(fpc, 4);
  98. hw = &fp->insn[fpc->inst_offset];
  99. if (fpc->consts[src.index].pipe >= 0) {
  100. struct nv30_fragment_program_data *fpd;
  101. fp->consts = realloc(fp->consts, ++fp->nr_consts *
  102. sizeof(*fpd));
  103. fpd = &fp->consts[fp->nr_consts - 1];
  104. fpd->offset = fpc->inst_offset + 4;
  105. fpd->index = fpc->consts[src.index].pipe;
  106. memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
  107. } else {
  108. memcpy(&fp->insn[fpc->inst_offset + 4],
  109. fpc->consts[src.index].vals,
  110. sizeof(uint32_t) * 4);
  111. }
  112. sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
  113. break;
  114. case NV30SR_NONE:
  115. sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
  116. break;
  117. default:
  118. assert(0);
  119. }
  120. if (src.negate)
  121. sr |= NV30_FP_REG_NEGATE;
  122. if (src.abs)
  123. hw[1] |= (1 << (29 + pos));
  124. sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
  125. (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
  126. (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
  127. (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
  128. hw[pos + 1] |= sr;
  129. }
  130. static void
  131. emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
  132. {
  133. struct nv30_fragment_program *fp = fpc->fp;
  134. uint32_t *hw = &fp->insn[fpc->inst_offset];
  135. switch (dst.type) {
  136. case NV30SR_TEMP:
  137. if (fpc->num_regs < (dst.index + 1))
  138. fpc->num_regs = dst.index + 1;
  139. break;
  140. case NV30SR_OUTPUT:
  141. if (dst.index == 1) {
  142. fp->fp_control |= 0xe;
  143. } else {
  144. hw[0] |= NV30_FP_OP_OUT_REG_HALF;
  145. }
  146. break;
  147. case NV30SR_NONE:
  148. hw[0] |= (1 << 30);
  149. break;
  150. default:
  151. assert(0);
  152. }
  153. hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
  154. }
  155. static void
  156. nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
  157. struct nv30_sreg dst, int mask,
  158. struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
  159. {
  160. struct nv30_fragment_program *fp = fpc->fp;
  161. uint32_t *hw;
  162. fpc->inst_offset = fp->insn_len;
  163. grow_insns(fpc, 4);
  164. hw = &fp->insn[fpc->inst_offset];
  165. memset(hw, 0, sizeof(uint32_t) * 4);
  166. if (op == NV30_FP_OP_OPCODE_KIL)
  167. fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
  168. hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
  169. hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
  170. hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
  171. if (sat)
  172. hw[0] |= NV30_FP_OP_OUT_SAT;
  173. if (dst.cc_update)
  174. hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
  175. hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
  176. hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
  177. (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
  178. (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
  179. (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
  180. emit_dst(fpc, dst);
  181. emit_src(fpc, 0, s0);
  182. emit_src(fpc, 1, s1);
  183. emit_src(fpc, 2, s2);
  184. }
  185. static void
  186. nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
  187. struct nv30_sreg dst, int mask,
  188. struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
  189. {
  190. struct nv30_fragment_program *fp = fpc->fp;
  191. nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
  192. fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
  193. fp->samplers |= (1 << unit);
  194. }
  195. static INLINE struct nv30_sreg
  196. tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
  197. {
  198. struct nv30_sreg src;
  199. switch (fsrc->Register.File) {
  200. case TGSI_FILE_INPUT:
  201. src = nv30_sr(NV30SR_INPUT,
  202. fpc->attrib_map[fsrc->Register.Index]);
  203. break;
  204. case TGSI_FILE_CONSTANT:
  205. src = constant(fpc, fsrc->Register.Index, NULL);
  206. break;
  207. case TGSI_FILE_IMMEDIATE:
  208. assert(fsrc->Register.Index < fpc->nr_imm);
  209. src = fpc->imm[fsrc->Register.Index];
  210. break;
  211. case TGSI_FILE_TEMPORARY:
  212. src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
  213. if (fpc->high_temp < src.index)
  214. fpc->high_temp = src.index;
  215. break;
  216. /* This is clearly insane, but gallium hands us shaders like this.
  217. * Luckily fragprog results are just temp regs..
  218. */
  219. case TGSI_FILE_OUTPUT:
  220. if (fsrc->Register.Index == fpc->colour_id)
  221. return nv30_sr(NV30SR_OUTPUT, 0);
  222. else
  223. return nv30_sr(NV30SR_OUTPUT, 1);
  224. break;
  225. default:
  226. NOUVEAU_ERR("bad src file\n");
  227. break;
  228. }
  229. src.abs = fsrc->Register.Absolute;
  230. src.negate = fsrc->Register.Negate;
  231. src.swz[0] = fsrc->Register.SwizzleX;
  232. src.swz[1] = fsrc->Register.SwizzleY;
  233. src.swz[2] = fsrc->Register.SwizzleZ;
  234. src.swz[3] = fsrc->Register.SwizzleW;
  235. return src;
  236. }
  237. static INLINE struct nv30_sreg
  238. tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
  239. int idx;
  240. switch (fdst->Register.File) {
  241. case TGSI_FILE_OUTPUT:
  242. if (fdst->Register.Index == fpc->colour_id)
  243. return nv30_sr(NV30SR_OUTPUT, 0);
  244. else
  245. return nv30_sr(NV30SR_OUTPUT, 1);
  246. break;
  247. case TGSI_FILE_TEMPORARY:
  248. idx = fdst->Register.Index + 1;
  249. if (fpc->high_temp < idx)
  250. fpc->high_temp = idx;
  251. return nv30_sr(NV30SR_TEMP, idx);
  252. case TGSI_FILE_NULL:
  253. return nv30_sr(NV30SR_NONE, 0);
  254. default:
  255. NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
  256. return nv30_sr(NV30SR_NONE, 0);
  257. }
  258. }
  259. static INLINE int
  260. tgsi_mask(uint tgsi)
  261. {
  262. int mask = 0;
  263. if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
  264. if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
  265. if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
  266. if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
  267. return mask;
  268. }
  269. static boolean
  270. src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
  271. struct nv30_sreg *src)
  272. {
  273. const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
  274. struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
  275. uint mask = 0;
  276. uint c;
  277. for (c = 0; c < 4; c++) {
  278. switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
  279. case TGSI_SWIZZLE_X:
  280. case TGSI_SWIZZLE_Y:
  281. case TGSI_SWIZZLE_Z:
  282. case TGSI_SWIZZLE_W:
  283. mask |= (1 << c);
  284. break;
  285. default:
  286. assert(0);
  287. }
  288. }
  289. if (mask == MASK_ALL)
  290. return TRUE;
  291. *src = temp(fpc);
  292. if (mask)
  293. arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
  294. return FALSE;
  295. }
  296. static boolean
  297. nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
  298. const struct tgsi_full_instruction *finst)
  299. {
  300. const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
  301. struct nv30_sreg src[3], dst, tmp;
  302. int mask, sat, unit = 0;
  303. int ai = -1, ci = -1;
  304. int i;
  305. if (finst->Instruction.Opcode == TGSI_OPCODE_END)
  306. return TRUE;
  307. fpc->temp_temp_count = 0;
  308. for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
  309. const struct tgsi_full_src_register *fsrc;
  310. fsrc = &finst->Src[i];
  311. if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
  312. src[i] = tgsi_src(fpc, fsrc);
  313. }
  314. }
  315. for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
  316. const struct tgsi_full_src_register *fsrc;
  317. fsrc = &finst->Src[i];
  318. switch (fsrc->Register.File) {
  319. case TGSI_FILE_INPUT:
  320. case TGSI_FILE_CONSTANT:
  321. case TGSI_FILE_TEMPORARY:
  322. if (!src_native_swz(fpc, fsrc, &src[i]))
  323. continue;
  324. break;
  325. default:
  326. break;
  327. }
  328. switch (fsrc->Register.File) {
  329. case TGSI_FILE_INPUT:
  330. if (ai == -1 || ai == fsrc->Register.Index) {
  331. ai = fsrc->Register.Index;
  332. src[i] = tgsi_src(fpc, fsrc);
  333. } else {
  334. NOUVEAU_MSG("extra src attr %d\n",
  335. fsrc->Register.Index);
  336. src[i] = temp(fpc);
  337. arith(fpc, 0, MOV, src[i], MASK_ALL,
  338. tgsi_src(fpc, fsrc), none, none);
  339. }
  340. break;
  341. case TGSI_FILE_CONSTANT:
  342. case TGSI_FILE_IMMEDIATE:
  343. if (ci == -1 || ci == fsrc->Register.Index) {
  344. ci = fsrc->Register.Index;
  345. src[i] = tgsi_src(fpc, fsrc);
  346. } else {
  347. src[i] = temp(fpc);
  348. arith(fpc, 0, MOV, src[i], MASK_ALL,
  349. tgsi_src(fpc, fsrc), none, none);
  350. }
  351. break;
  352. case TGSI_FILE_TEMPORARY:
  353. /* handled above */
  354. break;
  355. case TGSI_FILE_SAMPLER:
  356. unit = fsrc->Register.Index;
  357. break;
  358. case TGSI_FILE_OUTPUT:
  359. break;
  360. default:
  361. NOUVEAU_ERR("bad src file\n");
  362. return FALSE;
  363. }
  364. }
  365. dst = tgsi_dst(fpc, &finst->Dst[0]);
  366. mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
  367. sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
  368. switch (finst->Instruction.Opcode) {
  369. case TGSI_OPCODE_ABS:
  370. arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
  371. break;
  372. case TGSI_OPCODE_ADD:
  373. arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
  374. break;
  375. case TGSI_OPCODE_CMP:
  376. tmp = nv30_sr(NV30SR_NONE, 0);
  377. tmp.cc_update = 1;
  378. arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
  379. dst.cc_test = NV30_VP_INST_COND_GE;
  380. arith(fpc, sat, MOV, dst, mask, src[2], none, none);
  381. dst.cc_test = NV30_VP_INST_COND_LT;
  382. arith(fpc, sat, MOV, dst, mask, src[1], none, none);
  383. break;
  384. case TGSI_OPCODE_COS:
  385. arith(fpc, sat, COS, dst, mask, src[0], none, none);
  386. break;
  387. case TGSI_OPCODE_DP3:
  388. arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
  389. break;
  390. case TGSI_OPCODE_DP4:
  391. arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
  392. break;
  393. case TGSI_OPCODE_DPH:
  394. tmp = temp(fpc);
  395. arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
  396. arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
  397. swz(src[1], W, W, W, W), none);
  398. break;
  399. case TGSI_OPCODE_DST:
  400. arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
  401. break;
  402. case TGSI_OPCODE_EX2:
  403. arith(fpc, sat, EX2, dst, mask, src[0], none, none);
  404. break;
  405. case TGSI_OPCODE_FLR:
  406. arith(fpc, sat, FLR, dst, mask, src[0], none, none);
  407. break;
  408. case TGSI_OPCODE_FRC:
  409. arith(fpc, sat, FRC, dst, mask, src[0], none, none);
  410. break;
  411. case TGSI_OPCODE_KILP:
  412. arith(fpc, 0, KIL, none, 0, none, none, none);
  413. break;
  414. case TGSI_OPCODE_KIL:
  415. dst = nv30_sr(NV30SR_NONE, 0);
  416. dst.cc_update = 1;
  417. arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
  418. dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
  419. arith(fpc, 0, KIL, dst, 0, none, none, none);
  420. break;
  421. case TGSI_OPCODE_LG2:
  422. arith(fpc, sat, LG2, dst, mask, src[0], none, none);
  423. break;
  424. // case TGSI_OPCODE_LIT:
  425. case TGSI_OPCODE_LRP:
  426. arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
  427. break;
  428. case TGSI_OPCODE_MAD:
  429. arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
  430. break;
  431. case TGSI_OPCODE_MAX:
  432. arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
  433. break;
  434. case TGSI_OPCODE_MIN:
  435. arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
  436. break;
  437. case TGSI_OPCODE_MOV:
  438. arith(fpc, sat, MOV, dst, mask, src[0], none, none);
  439. break;
  440. case TGSI_OPCODE_MUL:
  441. arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
  442. break;
  443. case TGSI_OPCODE_POW:
  444. arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
  445. break;
  446. case TGSI_OPCODE_RCP:
  447. arith(fpc, sat, RCP, dst, mask, src[0], none, none);
  448. break;
  449. case TGSI_OPCODE_RET:
  450. assert(0);
  451. break;
  452. case TGSI_OPCODE_RFL:
  453. arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
  454. break;
  455. case TGSI_OPCODE_RSQ:
  456. arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
  457. break;
  458. case TGSI_OPCODE_SCS:
  459. /* avoid overwriting the source */
  460. if(src[0].swz[SWZ_X] != SWZ_X)
  461. {
  462. if (mask & MASK_X) {
  463. arith(fpc, sat, COS, dst, MASK_X,
  464. swz(src[0], X, X, X, X), none, none);
  465. }
  466. if (mask & MASK_Y) {
  467. arith(fpc, sat, SIN, dst, MASK_Y,
  468. swz(src[0], X, X, X, X), none, none);
  469. }
  470. }
  471. else
  472. {
  473. if (mask & MASK_Y) {
  474. arith(fpc, sat, SIN, dst, MASK_Y,
  475. swz(src[0], X, X, X, X), none, none);
  476. }
  477. if (mask & MASK_X) {
  478. arith(fpc, sat, COS, dst, MASK_X,
  479. swz(src[0], X, X, X, X), none, none);
  480. }
  481. }
  482. break;
  483. case TGSI_OPCODE_SIN:
  484. arith(fpc, sat, SIN, dst, mask, src[0], none, none);
  485. break;
  486. case TGSI_OPCODE_SGE:
  487. arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
  488. break;
  489. case TGSI_OPCODE_SGT:
  490. arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
  491. break;
  492. case TGSI_OPCODE_SLT:
  493. arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
  494. break;
  495. case TGSI_OPCODE_SUB:
  496. arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
  497. break;
  498. case TGSI_OPCODE_TEX:
  499. tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
  500. break;
  501. case TGSI_OPCODE_TXB:
  502. tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
  503. break;
  504. case TGSI_OPCODE_TXP:
  505. tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
  506. break;
  507. case TGSI_OPCODE_XPD:
  508. tmp = temp(fpc);
  509. arith(fpc, 0, MUL, tmp, mask,
  510. swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
  511. arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
  512. swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
  513. neg(tmp));
  514. break;
  515. default:
  516. NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
  517. return FALSE;
  518. }
  519. return TRUE;
  520. }
  521. static boolean
  522. nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
  523. const struct tgsi_full_declaration *fdec)
  524. {
  525. int hw;
  526. switch (fdec->Semantic.Name) {
  527. case TGSI_SEMANTIC_POSITION:
  528. hw = NV30_FP_OP_INPUT_SRC_POSITION;
  529. break;
  530. case TGSI_SEMANTIC_COLOR:
  531. if (fdec->Semantic.Index == 0) {
  532. hw = NV30_FP_OP_INPUT_SRC_COL0;
  533. } else
  534. if (fdec->Semantic.Index == 1) {
  535. hw = NV30_FP_OP_INPUT_SRC_COL1;
  536. } else {
  537. NOUVEAU_ERR("bad colour semantic index\n");
  538. return FALSE;
  539. }
  540. break;
  541. case TGSI_SEMANTIC_FOG:
  542. hw = NV30_FP_OP_INPUT_SRC_FOGC;
  543. break;
  544. case TGSI_SEMANTIC_GENERIC:
  545. if (fdec->Semantic.Index <= 7) {
  546. hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
  547. Index);
  548. } else {
  549. NOUVEAU_ERR("bad generic semantic index\n");
  550. return FALSE;
  551. }
  552. break;
  553. default:
  554. NOUVEAU_ERR("bad input semantic\n");
  555. return FALSE;
  556. }
  557. fpc->attrib_map[fdec->Range.First] = hw;
  558. return TRUE;
  559. }
  560. static boolean
  561. nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
  562. const struct tgsi_full_declaration *fdec)
  563. {
  564. switch (fdec->Semantic.Name) {
  565. case TGSI_SEMANTIC_POSITION:
  566. fpc->depth_id = fdec->Range.First;
  567. break;
  568. case TGSI_SEMANTIC_COLOR:
  569. fpc->colour_id = fdec->Range.First;
  570. break;
  571. default:
  572. NOUVEAU_ERR("bad output semantic\n");
  573. return FALSE;
  574. }
  575. return TRUE;
  576. }
  577. static boolean
  578. nv30_fragprog_prepare(struct nv30_fpc *fpc)
  579. {
  580. struct tgsi_parse_context p;
  581. /*int high_temp = -1, i;*/
  582. tgsi_parse_init(&p, fpc->fp->pipe.tokens);
  583. while (!tgsi_parse_end_of_tokens(&p)) {
  584. const union tgsi_full_token *tok = &p.FullToken;
  585. tgsi_parse_token(&p);
  586. switch(tok->Token.Type) {
  587. case TGSI_TOKEN_TYPE_DECLARATION:
  588. {
  589. const struct tgsi_full_declaration *fdec;
  590. fdec = &p.FullToken.FullDeclaration;
  591. switch (fdec->Declaration.File) {
  592. case TGSI_FILE_INPUT:
  593. if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
  594. goto out_err;
  595. break;
  596. case TGSI_FILE_OUTPUT:
  597. if (!nv30_fragprog_parse_decl_output(fpc, fdec))
  598. goto out_err;
  599. break;
  600. /*case TGSI_FILE_TEMPORARY:
  601. if (fdec->Range.Last > high_temp) {
  602. high_temp =
  603. fdec->Range.Last;
  604. }
  605. break;*/
  606. default:
  607. break;
  608. }
  609. }
  610. break;
  611. case TGSI_TOKEN_TYPE_IMMEDIATE:
  612. {
  613. struct tgsi_full_immediate *imm;
  614. float vals[4];
  615. imm = &p.FullToken.FullImmediate;
  616. assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
  617. assert(fpc->nr_imm < MAX_IMM);
  618. vals[0] = imm->u[0].Float;
  619. vals[1] = imm->u[1].Float;
  620. vals[2] = imm->u[2].Float;
  621. vals[3] = imm->u[3].Float;
  622. fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
  623. }
  624. break;
  625. default:
  626. break;
  627. }
  628. }
  629. tgsi_parse_free(&p);
  630. /*if (++high_temp) {
  631. fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
  632. for (i = 0; i < high_temp; i++)
  633. fpc->r_temp[i] = temp(fpc);
  634. fpc->r_temps_discard = 0;
  635. }*/
  636. return TRUE;
  637. out_err:
  638. /*if (fpc->r_temp)
  639. FREE(fpc->r_temp);*/
  640. tgsi_parse_free(&p);
  641. return FALSE;
  642. }
  643. static void
  644. nv30_fragprog_translate(struct nv30_context *nv30,
  645. struct nv30_fragment_program *fp)
  646. {
  647. struct tgsi_parse_context parse;
  648. struct nv30_fpc *fpc = NULL;
  649. tgsi_dump(fp->pipe.tokens,0);
  650. fpc = CALLOC(1, sizeof(struct nv30_fpc));
  651. if (!fpc)
  652. return;
  653. fpc->fp = fp;
  654. fpc->high_temp = -1;
  655. fpc->num_regs = 2;
  656. if (!nv30_fragprog_prepare(fpc)) {
  657. FREE(fpc);
  658. return;
  659. }
  660. tgsi_parse_init(&parse, fp->pipe.tokens);
  661. while (!tgsi_parse_end_of_tokens(&parse)) {
  662. tgsi_parse_token(&parse);
  663. switch (parse.FullToken.Token.Type) {
  664. case TGSI_TOKEN_TYPE_INSTRUCTION:
  665. {
  666. const struct tgsi_full_instruction *finst;
  667. finst = &parse.FullToken.FullInstruction;
  668. if (!nv30_fragprog_parse_instruction(fpc, finst))
  669. goto out_err;
  670. }
  671. break;
  672. default:
  673. break;
  674. }
  675. }
  676. fp->fp_control |= (fpc->num_regs-1)/2;
  677. fp->fp_reg_control = (1<<16)|0x4;
  678. /* Terminate final instruction */
  679. fp->insn[fpc->inst_offset] |= 0x00000001;
  680. /* Append NOP + END instruction, may or may not be necessary. */
  681. fpc->inst_offset = fp->insn_len;
  682. grow_insns(fpc, 4);
  683. fp->insn[fpc->inst_offset + 0] = 0x00000001;
  684. fp->insn[fpc->inst_offset + 1] = 0x00000000;
  685. fp->insn[fpc->inst_offset + 2] = 0x00000000;
  686. fp->insn[fpc->inst_offset + 3] = 0x00000000;
  687. fp->translated = TRUE;
  688. fp->on_hw = FALSE;
  689. out_err:
  690. tgsi_parse_free(&parse);
  691. FREE(fpc);
  692. }
  693. static void
  694. nv30_fragprog_upload(struct nv30_context *nv30,
  695. struct nv30_fragment_program *fp)
  696. {
  697. struct pipe_screen *pscreen = nv30->pipe.screen;
  698. const uint32_t le = 1;
  699. uint32_t *map;
  700. int i;
  701. map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
  702. #if 0
  703. for (i = 0; i < fp->insn_len; i++) {
  704. fflush(stdout); fflush(stderr);
  705. NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
  706. fflush(stdout); fflush(stderr);
  707. }
  708. #endif
  709. if ((*(const uint8_t *)&le)) {
  710. for (i = 0; i < fp->insn_len; i++) {
  711. map[i] = fp->insn[i];
  712. }
  713. } else {
  714. /* Weird swapping for big-endian chips */
  715. for (i = 0; i < fp->insn_len; i++) {
  716. map[i] = ((fp->insn[i] & 0xffff) << 16) |
  717. ((fp->insn[i] >> 16) & 0xffff);
  718. }
  719. }
  720. pipe_buffer_unmap(pscreen, fp->buffer);
  721. }
  722. static boolean
  723. nv30_fragprog_validate(struct nv30_context *nv30)
  724. {
  725. struct nv30_fragment_program *fp = nv30->fragprog;
  726. struct pipe_buffer *constbuf =
  727. nv30->constbuf[PIPE_SHADER_FRAGMENT];
  728. struct pipe_screen *pscreen = nv30->pipe.screen;
  729. struct nouveau_stateobj *so;
  730. boolean new_consts = FALSE;
  731. int i;
  732. if (fp->translated)
  733. goto update_constants;
  734. /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
  735. nv30_fragprog_translate(nv30, fp);
  736. if (!fp->translated) {
  737. /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
  738. return FALSE;
  739. }
  740. fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
  741. nv30_fragprog_upload(nv30, fp);
  742. so = so_new(4, 4, 1);
  743. so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
  744. so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
  745. NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
  746. NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
  747. NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
  748. so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
  749. so_data (so, fp->fp_control);
  750. so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
  751. so_data (so, fp->fp_reg_control);
  752. so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
  753. so_data (so, fp->samplers);
  754. so_ref(so, &fp->so);
  755. so_ref(NULL, &so);
  756. update_constants:
  757. if (fp->nr_consts) {
  758. float *map;
  759. map = pipe_buffer_map(pscreen, constbuf,
  760. PIPE_BUFFER_USAGE_CPU_READ);
  761. for (i = 0; i < fp->nr_consts; i++) {
  762. struct nv30_fragment_program_data *fpd = &fp->consts[i];
  763. uint32_t *p = &fp->insn[fpd->offset];
  764. uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
  765. if (!memcmp(p, cb, 4 * sizeof(float)))
  766. continue;
  767. memcpy(p, cb, 4 * sizeof(float));
  768. new_consts = TRUE;
  769. }
  770. pipe_buffer_unmap(pscreen, constbuf);
  771. if (new_consts)
  772. nv30_fragprog_upload(nv30, fp);
  773. }
  774. if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
  775. so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
  776. return TRUE;
  777. }
  778. return FALSE;
  779. }
  780. void
  781. nv30_fragprog_destroy(struct nv30_context *nv30,
  782. struct nv30_fragment_program *fp)
  783. {
  784. if (fp->buffer)
  785. pipe_buffer_reference(&fp->buffer, NULL);
  786. if (fp->so)
  787. so_ref(NULL, &fp->so);
  788. if (fp->insn_len)
  789. FREE(fp->insn);
  790. }
  791. struct nv30_state_entry nv30_state_fragprog = {
  792. .validate = nv30_fragprog_validate,
  793. .dirty = {
  794. .pipe = NV30_NEW_FRAGPROG,
  795. .hw = NV30_STATE_FRAGPROG
  796. }
  797. };