Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

draw_pipe_aapoint.c 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884
  1. /**************************************************************************
  2. *
  3. * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /**
  28. * AA point stage: AA points are converted to quads and rendered with a
  29. * special fragment shader. Another approach would be to use a texture
  30. * map image of a point, but experiments indicate the quality isn't nearly
  31. * as good as this approach.
  32. *
  33. * Note: this looks a lot like draw_aaline.c but there's actually little
  34. * if any code that can be shared.
  35. *
  36. * Authors: Brian Paul
  37. */
  38. #include "pipe/p_context.h"
  39. #include "pipe/p_defines.h"
  40. #include "pipe/p_shader_tokens.h"
  41. #include "tgsi/tgsi_transform.h"
  42. #include "tgsi/tgsi_dump.h"
  43. #include "util/u_math.h"
  44. #include "util/u_memory.h"
  45. #include "draw_context.h"
  46. #include "draw_vs.h"
  47. #include "draw_pipe.h"
  48. /*
  49. * Enabling NORMALIZE might give _slightly_ better results.
  50. * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
  51. * d=x*x+y*y. Since we're working with a unit circle, the later seems
  52. * close enough and saves some costly instructions.
  53. */
  54. #define NORMALIZE 0
  55. /**
  56. * Subclass of pipe_shader_state to carry extra fragment shader info.
  57. */
  58. struct aapoint_fragment_shader
  59. {
  60. struct pipe_shader_state state;
  61. void *driver_fs; /**< the regular shader */
  62. void *aapoint_fs; /**< the aa point-augmented shader */
  63. int generic_attrib; /**< The generic input attrib/texcoord we'll use */
  64. };
  65. /**
  66. * Subclass of draw_stage
  67. */
  68. struct aapoint_stage
  69. {
  70. struct draw_stage stage;
  71. int psize_slot;
  72. float radius;
  73. /** this is the vertex attrib slot for the new texcoords */
  74. uint tex_slot;
  75. uint pos_slot;
  76. /*
  77. * Currently bound state
  78. */
  79. struct aapoint_fragment_shader *fs;
  80. /*
  81. * Driver interface/override functions
  82. */
  83. void * (*driver_create_fs_state)(struct pipe_context *,
  84. const struct pipe_shader_state *);
  85. void (*driver_bind_fs_state)(struct pipe_context *, void *);
  86. void (*driver_delete_fs_state)(struct pipe_context *, void *);
  87. struct pipe_context *pipe;
  88. };
  89. /**
  90. * Subclass of tgsi_transform_context, used for transforming the
  91. * user's fragment shader to add the special AA instructions.
  92. */
  93. struct aa_transform_context {
  94. struct tgsi_transform_context base;
  95. uint tempsUsed; /**< bitmask */
  96. int colorOutput; /**< which output is the primary color */
  97. int maxInput, maxGeneric; /**< max input index found */
  98. int tmp0, colorTemp; /**< temp registers */
  99. boolean firstInstruction;
  100. };
  101. /**
  102. * TGSI declaration transform callback.
  103. * Look for two free temp regs and available input reg for new texcoords.
  104. */
  105. static void
  106. aa_transform_decl(struct tgsi_transform_context *ctx,
  107. struct tgsi_full_declaration *decl)
  108. {
  109. struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
  110. if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
  111. decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
  112. decl->Semantic.Index == 0) {
  113. aactx->colorOutput = decl->DeclarationRange.First;
  114. }
  115. else if (decl->Declaration.File == TGSI_FILE_INPUT) {
  116. if ((int) decl->DeclarationRange.Last > aactx->maxInput)
  117. aactx->maxInput = decl->DeclarationRange.Last;
  118. if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
  119. (int) decl->Semantic.Index > aactx->maxGeneric) {
  120. aactx->maxGeneric = decl->Semantic.Index;
  121. }
  122. }
  123. else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
  124. uint i;
  125. for (i = decl->DeclarationRange.First;
  126. i <= decl->DeclarationRange.Last; i++) {
  127. aactx->tempsUsed |= (1 << i);
  128. }
  129. }
  130. ctx->emit_declaration(ctx, decl);
  131. }
  132. /**
  133. * TGSI instruction transform callback.
  134. * Replace writes to result.color w/ a temp reg.
  135. * Upon END instruction, insert texture sampling code for antialiasing.
  136. */
  137. static void
  138. aa_transform_inst(struct tgsi_transform_context *ctx,
  139. struct tgsi_full_instruction *inst)
  140. {
  141. struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
  142. struct tgsi_full_instruction newInst;
  143. if (aactx->firstInstruction) {
  144. /* emit our new declarations before the first instruction */
  145. struct tgsi_full_declaration decl;
  146. const int texInput = aactx->maxInput + 1;
  147. int tmp0;
  148. uint i;
  149. /* find two free temp regs */
  150. for (i = 0; i < 32; i++) {
  151. if ((aactx->tempsUsed & (1 << i)) == 0) {
  152. /* found a free temp */
  153. if (aactx->tmp0 < 0)
  154. aactx->tmp0 = i;
  155. else if (aactx->colorTemp < 0)
  156. aactx->colorTemp = i;
  157. else
  158. break;
  159. }
  160. }
  161. assert(aactx->colorTemp != aactx->tmp0);
  162. tmp0 = aactx->tmp0;
  163. /* declare new generic input/texcoord */
  164. decl = tgsi_default_full_declaration();
  165. decl.Declaration.File = TGSI_FILE_INPUT;
  166. /* XXX this could be linear... */
  167. decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
  168. decl.Declaration.Semantic = 1;
  169. decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
  170. decl.Semantic.Index = aactx->maxGeneric + 1;
  171. decl.DeclarationRange.First =
  172. decl.DeclarationRange.Last = texInput;
  173. ctx->emit_declaration(ctx, &decl);
  174. /* declare new temp regs */
  175. decl = tgsi_default_full_declaration();
  176. decl.Declaration.File = TGSI_FILE_TEMPORARY;
  177. decl.DeclarationRange.First =
  178. decl.DeclarationRange.Last = tmp0;
  179. ctx->emit_declaration(ctx, &decl);
  180. decl = tgsi_default_full_declaration();
  181. decl.Declaration.File = TGSI_FILE_TEMPORARY;
  182. decl.DeclarationRange.First =
  183. decl.DeclarationRange.Last = aactx->colorTemp;
  184. ctx->emit_declaration(ctx, &decl);
  185. aactx->firstInstruction = FALSE;
  186. /*
  187. * Emit code to compute fragment coverage, kill if outside point radius
  188. *
  189. * Temp reg0 usage:
  190. * t0.x = distance of fragment from center point
  191. * t0.y = boolean, is t0.x > 1.0, also misc temp usage
  192. * t0.z = temporary for computing 1/(1-k) value
  193. * t0.w = final coverage value
  194. */
  195. /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
  196. newInst = tgsi_default_full_instruction();
  197. newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
  198. newInst.Instruction.NumDstRegs = 1;
  199. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  200. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  201. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
  202. newInst.Instruction.NumSrcRegs = 2;
  203. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
  204. newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
  205. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
  206. newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
  207. ctx->emit_instruction(ctx, &newInst);
  208. /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
  209. newInst = tgsi_default_full_instruction();
  210. newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
  211. newInst.Instruction.NumDstRegs = 1;
  212. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  213. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  214. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
  215. newInst.Instruction.NumSrcRegs = 2;
  216. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  217. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  218. newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
  219. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
  220. newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
  221. newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
  222. ctx->emit_instruction(ctx, &newInst);
  223. #if NORMALIZE /* OPTIONAL normalization of length */
  224. /* RSQ t0.x, t0.x; */
  225. newInst = tgsi_default_full_instruction();
  226. newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
  227. newInst.Instruction.NumDstRegs = 1;
  228. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  229. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  230. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
  231. newInst.Instruction.NumSrcRegs = 1;
  232. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  233. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  234. ctx->emit_instruction(ctx, &newInst);
  235. /* RCP t0.x, t0.x; */
  236. newInst = tgsi_default_full_instruction();
  237. newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
  238. newInst.Instruction.NumDstRegs = 1;
  239. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  240. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  241. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
  242. newInst.Instruction.NumSrcRegs = 1;
  243. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  244. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  245. ctx->emit_instruction(ctx, &newInst);
  246. #endif
  247. /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */
  248. newInst = tgsi_default_full_instruction();
  249. newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
  250. newInst.Instruction.NumDstRegs = 1;
  251. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  252. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  253. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
  254. newInst.Instruction.NumSrcRegs = 2;
  255. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  256. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  257. newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
  258. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
  259. newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
  260. newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
  261. ctx->emit_instruction(ctx, &newInst);
  262. /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */
  263. newInst = tgsi_default_full_instruction();
  264. newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
  265. newInst.Instruction.NumDstRegs = 0;
  266. newInst.Instruction.NumSrcRegs = 1;
  267. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  268. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  269. newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
  270. newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
  271. newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
  272. newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
  273. newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
  274. ctx->emit_instruction(ctx, &newInst);
  275. /* compute coverage factor = (1-d)/(1-k) */
  276. /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
  277. newInst = tgsi_default_full_instruction();
  278. newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
  279. newInst.Instruction.NumDstRegs = 1;
  280. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  281. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  282. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
  283. newInst.Instruction.NumSrcRegs = 2;
  284. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
  285. newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
  286. newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
  287. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
  288. newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
  289. newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
  290. ctx->emit_instruction(ctx, &newInst);
  291. /* RCP t0.z, t0.z; # t0.z = 1 / m */
  292. newInst = tgsi_default_full_instruction();
  293. newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
  294. newInst.Instruction.NumDstRegs = 1;
  295. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  296. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  297. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
  298. newInst.Instruction.NumSrcRegs = 1;
  299. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  300. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  301. newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
  302. ctx->emit_instruction(ctx, &newInst);
  303. /* SUB t0.y, 1, t0.x; # d = 1 - d */
  304. newInst = tgsi_default_full_instruction();
  305. newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
  306. newInst.Instruction.NumDstRegs = 1;
  307. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  308. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  309. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
  310. newInst.Instruction.NumSrcRegs = 2;
  311. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
  312. newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
  313. newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
  314. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
  315. newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
  316. newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
  317. ctx->emit_instruction(ctx, &newInst);
  318. /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
  319. newInst = tgsi_default_full_instruction();
  320. newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
  321. newInst.Instruction.NumDstRegs = 1;
  322. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  323. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  324. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
  325. newInst.Instruction.NumSrcRegs = 2;
  326. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  327. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  328. newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
  329. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
  330. newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
  331. newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
  332. ctx->emit_instruction(ctx, &newInst);
  333. /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
  334. newInst = tgsi_default_full_instruction();
  335. newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
  336. newInst.Instruction.NumDstRegs = 1;
  337. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  338. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  339. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
  340. newInst.Instruction.NumSrcRegs = 2;
  341. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  342. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  343. newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
  344. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
  345. newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
  346. newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
  347. ctx->emit_instruction(ctx, &newInst);
  348. /* CMP t0.w, -t0.y, tex.w, t0.w;
  349. * # if -t0.y < 0 then
  350. * t0.w = 1
  351. * else
  352. * t0.w = t0.w
  353. */
  354. newInst = tgsi_default_full_instruction();
  355. newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
  356. newInst.Instruction.NumDstRegs = 1;
  357. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
  358. newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
  359. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
  360. newInst.Instruction.NumSrcRegs = 3;
  361. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  362. newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
  363. newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
  364. newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
  365. newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
  366. newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
  367. newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
  368. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
  369. newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
  370. newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
  371. newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
  372. newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
  373. newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
  374. newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
  375. newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
  376. newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
  377. newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
  378. newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
  379. newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
  380. ctx->emit_instruction(ctx, &newInst);
  381. }
  382. if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
  383. /* add alpha modulation code at tail of program */
  384. /* MOV result.color.xyz, colorTemp; */
  385. newInst = tgsi_default_full_instruction();
  386. newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
  387. newInst.Instruction.NumDstRegs = 1;
  388. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
  389. newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
  390. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
  391. newInst.Instruction.NumSrcRegs = 1;
  392. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  393. newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
  394. ctx->emit_instruction(ctx, &newInst);
  395. /* MUL result.color.w, colorTemp, tmp0.w; */
  396. newInst = tgsi_default_full_instruction();
  397. newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
  398. newInst.Instruction.NumDstRegs = 1;
  399. newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
  400. newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
  401. newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
  402. newInst.Instruction.NumSrcRegs = 2;
  403. newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
  404. newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
  405. newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
  406. newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
  407. ctx->emit_instruction(ctx, &newInst);
  408. }
  409. else {
  410. /* Not an END instruction.
  411. * Look for writes to result.color and replace with colorTemp reg.
  412. */
  413. uint i;
  414. for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
  415. struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
  416. if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
  417. dst->DstRegister.Index == aactx->colorOutput) {
  418. dst->DstRegister.File = TGSI_FILE_TEMPORARY;
  419. dst->DstRegister.Index = aactx->colorTemp;
  420. }
  421. }
  422. }
  423. ctx->emit_instruction(ctx, inst);
  424. }
  425. /**
  426. * Generate the frag shader we'll use for drawing AA points.
  427. * This will be the user's shader plus some texture/modulate instructions.
  428. */
  429. static boolean
  430. generate_aapoint_fs(struct aapoint_stage *aapoint)
  431. {
  432. const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
  433. struct pipe_shader_state aapoint_fs;
  434. struct aa_transform_context transform;
  435. #define MAX 1000
  436. aapoint_fs = *orig_fs; /* copy to init */
  437. aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
  438. if (aapoint_fs.tokens == NULL)
  439. return FALSE;
  440. memset(&transform, 0, sizeof(transform));
  441. transform.colorOutput = -1;
  442. transform.maxInput = -1;
  443. transform.maxGeneric = -1;
  444. transform.colorTemp = -1;
  445. transform.tmp0 = -1;
  446. transform.firstInstruction = TRUE;
  447. transform.base.transform_instruction = aa_transform_inst;
  448. transform.base.transform_declaration = aa_transform_decl;
  449. tgsi_transform_shader(orig_fs->tokens,
  450. (struct tgsi_token *) aapoint_fs.tokens,
  451. MAX, &transform.base);
  452. #if 0 /* DEBUG */
  453. printf("draw_aapoint, orig shader:\n");
  454. tgsi_dump(orig_fs->tokens, 0);
  455. printf("draw_aapoint, new shader:\n");
  456. tgsi_dump(aapoint_fs.tokens, 0);
  457. #endif
  458. aapoint->fs->aapoint_fs
  459. = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
  460. if (aapoint->fs->aapoint_fs == NULL)
  461. goto fail;
  462. aapoint->fs->generic_attrib = transform.maxGeneric + 1;
  463. FREE((void *)aapoint_fs.tokens);
  464. return TRUE;
  465. fail:
  466. FREE((void *)aapoint_fs.tokens);
  467. return FALSE;
  468. }
  469. /**
  470. * When we're about to draw our first AA point in a batch, this function is
  471. * called to tell the driver to bind our modified fragment shader.
  472. */
  473. static boolean
  474. bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
  475. {
  476. struct draw_context *draw = aapoint->stage.draw;
  477. if (!aapoint->fs->aapoint_fs &&
  478. !generate_aapoint_fs(aapoint))
  479. return FALSE;
  480. draw->suspend_flushing = TRUE;
  481. aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
  482. draw->suspend_flushing = FALSE;
  483. return TRUE;
  484. }
  485. static INLINE struct aapoint_stage *
  486. aapoint_stage( struct draw_stage *stage )
  487. {
  488. return (struct aapoint_stage *) stage;
  489. }
  490. /**
  491. * Draw an AA point by drawing a quad.
  492. */
  493. static void
  494. aapoint_point(struct draw_stage *stage, struct prim_header *header)
  495. {
  496. const struct aapoint_stage *aapoint = aapoint_stage(stage);
  497. struct prim_header tri;
  498. struct vertex_header *v[4];
  499. uint texPos = aapoint->tex_slot;
  500. uint pos_slot = aapoint->pos_slot;
  501. float radius, *pos, *tex;
  502. uint i;
  503. float k;
  504. if (aapoint->psize_slot >= 0) {
  505. radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
  506. }
  507. else {
  508. radius = aapoint->radius;
  509. }
  510. /*
  511. * Note: the texcoords (generic attrib, really) we use are special:
  512. * The S and T components simply vary from -1 to +1.
  513. * The R component is k, below.
  514. * The Q component is 1.0 and will used as a handy constant in the
  515. * fragment shader.
  516. */
  517. /*
  518. * k is the threshold distance from the point's center at which
  519. * we begin alpha attenuation (the coverage value).
  520. * Operating within a unit circle, we'll compute the fragment's
  521. * distance 'd' from the center point using the texcoords.
  522. * IF d > 1.0 THEN
  523. * KILL fragment
  524. * ELSE IF d > k THEN
  525. * compute coverage in [0,1] proportional to d in [k, 1].
  526. * ELSE
  527. * coverage = 1.0; // full coverage
  528. * ENDIF
  529. *
  530. * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
  531. * avoid using IF/ELSE/ENDIF TGSI opcodes.
  532. */
  533. #if !NORMALIZE
  534. k = 1.0f / radius;
  535. k = 1.0f - 2.0f * k + k * k;
  536. #else
  537. k = 1.0f - 1.0f / radius;
  538. #endif
  539. /* allocate/dup new verts */
  540. for (i = 0; i < 4; i++) {
  541. v[i] = dup_vert(stage, header->v[0], i);
  542. }
  543. /* new verts */
  544. pos = v[0]->data[pos_slot];
  545. pos[0] -= radius;
  546. pos[1] -= radius;
  547. pos = v[1]->data[pos_slot];
  548. pos[0] += radius;
  549. pos[1] -= radius;
  550. pos = v[2]->data[pos_slot];
  551. pos[0] += radius;
  552. pos[1] += radius;
  553. pos = v[3]->data[pos_slot];
  554. pos[0] -= radius;
  555. pos[1] += radius;
  556. /* new texcoords */
  557. tex = v[0]->data[texPos];
  558. ASSIGN_4V(tex, -1, -1, k, 1);
  559. tex = v[1]->data[texPos];
  560. ASSIGN_4V(tex, 1, -1, k, 1);
  561. tex = v[2]->data[texPos];
  562. ASSIGN_4V(tex, 1, 1, k, 1);
  563. tex = v[3]->data[texPos];
  564. ASSIGN_4V(tex, -1, 1, k, 1);
  565. /* emit 2 tris for the quad strip */
  566. tri.v[0] = v[0];
  567. tri.v[1] = v[1];
  568. tri.v[2] = v[2];
  569. stage->next->tri( stage->next, &tri );
  570. tri.v[0] = v[0];
  571. tri.v[1] = v[2];
  572. tri.v[2] = v[3];
  573. stage->next->tri( stage->next, &tri );
  574. }
  575. static void
  576. aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
  577. {
  578. auto struct aapoint_stage *aapoint = aapoint_stage(stage);
  579. struct draw_context *draw = stage->draw;
  580. assert(draw->rasterizer->point_smooth);
  581. if (draw->rasterizer->point_size <= 2.0)
  582. aapoint->radius = 1.0;
  583. else
  584. aapoint->radius = 0.5f * draw->rasterizer->point_size;
  585. /*
  586. * Bind (generate) our fragprog.
  587. */
  588. bind_aapoint_fragment_shader(aapoint);
  589. /* update vertex attrib info */
  590. aapoint->tex_slot = draw->vs.num_vs_outputs;
  591. assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
  592. aapoint->pos_slot = draw->vs.position_output;
  593. draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
  594. draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
  595. draw->extra_vp_outputs.slot = aapoint->tex_slot;
  596. /* find psize slot in post-transform vertex */
  597. aapoint->psize_slot = -1;
  598. if (draw->rasterizer->point_size_per_vertex) {
  599. /* find PSIZ vertex output */
  600. const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
  601. uint i;
  602. for (i = 0; i < vs->info.num_outputs; i++) {
  603. if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
  604. aapoint->psize_slot = i;
  605. break;
  606. }
  607. }
  608. }
  609. /* now really draw first point */
  610. stage->point = aapoint_point;
  611. stage->point(stage, header);
  612. }
  613. static void
  614. aapoint_flush(struct draw_stage *stage, unsigned flags)
  615. {
  616. struct draw_context *draw = stage->draw;
  617. struct aapoint_stage *aapoint = aapoint_stage(stage);
  618. struct pipe_context *pipe = aapoint->pipe;
  619. stage->point = aapoint_first_point;
  620. stage->next->flush( stage->next, flags );
  621. /* restore original frag shader */
  622. draw->suspend_flushing = TRUE;
  623. aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
  624. draw->suspend_flushing = FALSE;
  625. draw->extra_vp_outputs.slot = 0;
  626. }
  627. static void
  628. aapoint_reset_stipple_counter(struct draw_stage *stage)
  629. {
  630. stage->next->reset_stipple_counter( stage->next );
  631. }
  632. static void
  633. aapoint_destroy(struct draw_stage *stage)
  634. {
  635. draw_free_temp_verts( stage );
  636. FREE( stage );
  637. }
  638. static struct aapoint_stage *
  639. draw_aapoint_stage(struct draw_context *draw)
  640. {
  641. struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
  642. if (aapoint == NULL)
  643. goto fail;
  644. if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
  645. goto fail;
  646. aapoint->stage.draw = draw;
  647. aapoint->stage.name = "aapoint";
  648. aapoint->stage.next = NULL;
  649. aapoint->stage.point = aapoint_first_point;
  650. aapoint->stage.line = draw_pipe_passthrough_line;
  651. aapoint->stage.tri = draw_pipe_passthrough_tri;
  652. aapoint->stage.flush = aapoint_flush;
  653. aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
  654. aapoint->stage.destroy = aapoint_destroy;
  655. return aapoint;
  656. fail:
  657. if (aapoint)
  658. aapoint_destroy(&aapoint->stage);
  659. return NULL;
  660. }
  661. static struct aapoint_stage *
  662. aapoint_stage_from_pipe(struct pipe_context *pipe)
  663. {
  664. struct draw_context *draw = (struct draw_context *) pipe->draw;
  665. return aapoint_stage(draw->pipeline.aapoint);
  666. }
  667. /**
  668. * This function overrides the driver's create_fs_state() function and
  669. * will typically be called by the state tracker.
  670. */
  671. static void *
  672. aapoint_create_fs_state(struct pipe_context *pipe,
  673. const struct pipe_shader_state *fs)
  674. {
  675. struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
  676. struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
  677. if (aafs == NULL)
  678. return NULL;
  679. aafs->state = *fs;
  680. /* pass-through */
  681. aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
  682. return aafs;
  683. }
  684. static void
  685. aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
  686. {
  687. struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
  688. struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
  689. /* save current */
  690. aapoint->fs = aafs;
  691. /* pass-through */
  692. aapoint->driver_bind_fs_state(aapoint->pipe,
  693. (aafs ? aafs->driver_fs : NULL));
  694. }
  695. static void
  696. aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
  697. {
  698. struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
  699. struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
  700. /* pass-through */
  701. aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
  702. if (aafs->aapoint_fs)
  703. aapoint->driver_delete_fs_state(aapoint->pipe, aafs->aapoint_fs);
  704. FREE(aafs);
  705. }
  706. /**
  707. * Called by drivers that want to install this AA point prim stage
  708. * into the draw module's pipeline. This will not be used if the
  709. * hardware has native support for AA points.
  710. */
  711. boolean
  712. draw_install_aapoint_stage(struct draw_context *draw,
  713. struct pipe_context *pipe)
  714. {
  715. struct aapoint_stage *aapoint;
  716. pipe->draw = (void *) draw;
  717. /*
  718. * Create / install AA point drawing / prim stage
  719. */
  720. aapoint = draw_aapoint_stage( draw );
  721. if (aapoint == NULL)
  722. goto fail;
  723. aapoint->pipe = pipe;
  724. /* save original driver functions */
  725. aapoint->driver_create_fs_state = pipe->create_fs_state;
  726. aapoint->driver_bind_fs_state = pipe->bind_fs_state;
  727. aapoint->driver_delete_fs_state = pipe->delete_fs_state;
  728. /* override the driver's functions */
  729. pipe->create_fs_state = aapoint_create_fs_state;
  730. pipe->bind_fs_state = aapoint_bind_fs_state;
  731. pipe->delete_fs_state = aapoint_delete_fs_state;
  732. draw->pipeline.aapoint = &aapoint->stage;
  733. return TRUE;
  734. fail:
  735. if (aapoint)
  736. aapoint->stage.destroy( &aapoint->stage );
  737. return FALSE;
  738. }