Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lp_bld_tgsi_soa.c 45KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496
  1. /**************************************************************************
  2. *
  3. * Copyright 2009 VMware, Inc.
  4. * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  5. * All Rights Reserved.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a
  8. * copy of this software and associated documentation files (the
  9. * "Software"), to deal in the Software without restriction, including
  10. * without limitation the rights to use, copy, modify, merge, publish,
  11. * distribute, sub license, and/or sell copies of the Software, and to
  12. * permit persons to whom the Software is furnished to do so, subject to
  13. * the following conditions:
  14. *
  15. * The above copyright notice and this permission notice (including the
  16. * next paragraph) shall be included in all copies or substantial portions
  17. * of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26. *
  27. **************************************************************************/
  28. #include "pipe/p_config.h"
  29. #include "pipe/p_shader_tokens.h"
  30. #include "util/u_debug.h"
  31. #include "util/u_math.h"
  32. #include "util/u_memory.h"
  33. #include "tgsi/tgsi_info.h"
  34. #include "tgsi/tgsi_parse.h"
  35. #include "tgsi/tgsi_util.h"
  36. #include "tgsi/tgsi_exec.h"
  37. #include "lp_bld_type.h"
  38. #include "lp_bld_const.h"
  39. #include "lp_bld_intr.h"
  40. #include "lp_bld_arit.h"
  41. #include "lp_bld_logic.h"
  42. #include "lp_bld_swizzle.h"
  43. #include "lp_bld_tgsi.h"
  44. #include "lp_bld_debug.h"
  45. #define LP_MAX_TEMPS 256
  46. #define LP_MAX_IMMEDIATES 256
  47. #define FOR_EACH_CHANNEL( CHAN )\
  48. for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
  49. #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
  50. ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
  51. #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
  52. if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
  53. #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
  54. FOR_EACH_CHANNEL( CHAN )\
  55. IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
  56. #define CHAN_X 0
  57. #define CHAN_Y 1
  58. #define CHAN_Z 2
  59. #define CHAN_W 3
  60. struct lp_build_tgsi_soa_context
  61. {
  62. struct lp_build_context base;
  63. LLVMValueRef x, y, w;
  64. LLVMValueRef a0_ptr;
  65. LLVMValueRef dadx_ptr;
  66. LLVMValueRef dady_ptr;
  67. LLVMValueRef consts_ptr;
  68. LLVMValueRef (*outputs)[NUM_CHANNELS];
  69. LLVMValueRef samplers_ptr;
  70. LLVMValueRef oow;
  71. LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
  72. LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
  73. LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
  74. LLVMValueRef mask;
  75. /** Coords/texels store */
  76. LLVMValueRef store_ptr;
  77. };
  78. /**
  79. * Register fetch.
  80. */
  81. static LLVMValueRef
  82. emit_fetch(
  83. struct lp_build_tgsi_soa_context *bld,
  84. const struct tgsi_full_instruction *inst,
  85. unsigned index,
  86. const unsigned chan_index )
  87. {
  88. const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
  89. unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
  90. LLVMValueRef res;
  91. switch (swizzle) {
  92. case TGSI_EXTSWIZZLE_X:
  93. case TGSI_EXTSWIZZLE_Y:
  94. case TGSI_EXTSWIZZLE_Z:
  95. case TGSI_EXTSWIZZLE_W:
  96. switch (reg->SrcRegister.File) {
  97. case TGSI_FILE_CONSTANT: {
  98. LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
  99. LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
  100. LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
  101. res = lp_build_broadcast_scalar(&bld->base, scalar);
  102. break;
  103. }
  104. case TGSI_FILE_IMMEDIATE:
  105. res = bld->immediates[reg->SrcRegister.Index][swizzle];
  106. assert(res);
  107. break;
  108. case TGSI_FILE_INPUT:
  109. res = bld->inputs[reg->SrcRegister.Index][swizzle];
  110. assert(res);
  111. break;
  112. case TGSI_FILE_TEMPORARY:
  113. res = bld->temps[reg->SrcRegister.Index][swizzle];
  114. if(!res)
  115. return bld->base.undef;
  116. break;
  117. default:
  118. assert( 0 );
  119. return bld->base.undef;
  120. }
  121. break;
  122. case TGSI_EXTSWIZZLE_ZERO:
  123. res = bld->base.zero;
  124. break;
  125. case TGSI_EXTSWIZZLE_ONE:
  126. res = bld->base.one;
  127. break;
  128. default:
  129. assert( 0 );
  130. return bld->base.undef;
  131. }
  132. switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
  133. case TGSI_UTIL_SIGN_CLEAR:
  134. res = lp_build_abs( &bld->base, res );
  135. break;
  136. case TGSI_UTIL_SIGN_SET:
  137. res = lp_build_abs( &bld->base, res );
  138. res = LLVMBuildNeg( bld->base.builder, res, "" );
  139. break;
  140. case TGSI_UTIL_SIGN_TOGGLE:
  141. res = LLVMBuildNeg( bld->base.builder, res, "" );
  142. break;
  143. case TGSI_UTIL_SIGN_KEEP:
  144. break;
  145. }
  146. return res;
  147. }
  148. /**
  149. * Register store.
  150. */
  151. static void
  152. emit_store(
  153. struct lp_build_tgsi_soa_context *bld,
  154. const struct tgsi_full_instruction *inst,
  155. unsigned index,
  156. unsigned chan_index,
  157. LLVMValueRef value)
  158. {
  159. const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
  160. switch( inst->Instruction.Saturate ) {
  161. case TGSI_SAT_NONE:
  162. break;
  163. case TGSI_SAT_ZERO_ONE:
  164. value = lp_build_max(&bld->base, value, bld->base.zero);
  165. value = lp_build_min(&bld->base, value, bld->base.one);
  166. break;
  167. case TGSI_SAT_MINUS_PLUS_ONE:
  168. value = lp_build_max(&bld->base, value, lp_build_const_uni(bld->base.type, -1.0));
  169. value = lp_build_min(&bld->base, value, bld->base.one);
  170. break;
  171. default:
  172. assert(0);
  173. }
  174. switch( reg->DstRegister.File ) {
  175. case TGSI_FILE_OUTPUT:
  176. bld->outputs[reg->DstRegister.Index][chan_index] = value;
  177. break;
  178. case TGSI_FILE_TEMPORARY:
  179. bld->temps[reg->DstRegister.Index][chan_index] = value;
  180. break;
  181. case TGSI_FILE_ADDRESS:
  182. /* FIXME */
  183. assert(0);
  184. break;
  185. default:
  186. assert( 0 );
  187. }
  188. }
  189. void PIPE_CDECL
  190. lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
  191. uint32_t unit,
  192. float *store )
  193. {
  194. struct tgsi_sampler *sampler = samplers[unit];
  195. #if 0
  196. uint j;
  197. debug_printf("%s sampler: %p (%p) store: %p\n",
  198. __FUNCTION__,
  199. sampler, *sampler,
  200. store );
  201. debug_printf("lodbias %f\n", store[12]);
  202. for (j = 0; j < 4; j++)
  203. debug_printf("sample %d texcoord %f %f\n",
  204. j,
  205. store[0+j],
  206. store[4+j]);
  207. #endif
  208. {
  209. float rgba[NUM_CHANNELS][QUAD_SIZE];
  210. sampler->get_samples(sampler,
  211. &store[0],
  212. &store[4],
  213. &store[8],
  214. 0.0f, /*store[12], lodbias */
  215. rgba);
  216. memcpy(store, rgba, sizeof rgba);
  217. }
  218. #if 0
  219. for (j = 0; j < 4; j++)
  220. debug_printf("sample %d result %f %f %f %f\n",
  221. j,
  222. store[0+j],
  223. store[4+j],
  224. store[8+j],
  225. store[12+j]);
  226. #endif
  227. }
  228. /**
  229. * High-level instruction translators.
  230. */
  231. static void
  232. emit_tex( struct lp_build_tgsi_soa_context *bld,
  233. const struct tgsi_full_instruction *inst,
  234. boolean apply_lodbias,
  235. boolean projected)
  236. {
  237. LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
  238. const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
  239. LLVMValueRef lodbias;
  240. LLVMValueRef oow;
  241. LLVMValueRef args[3];
  242. unsigned count;
  243. unsigned i;
  244. switch (inst->InstructionExtTexture.Texture) {
  245. case TGSI_TEXTURE_1D:
  246. case TGSI_TEXTURE_SHADOW1D:
  247. count = 1;
  248. break;
  249. case TGSI_TEXTURE_2D:
  250. case TGSI_TEXTURE_RECT:
  251. case TGSI_TEXTURE_SHADOW2D:
  252. case TGSI_TEXTURE_SHADOWRECT:
  253. count = 2;
  254. break;
  255. case TGSI_TEXTURE_3D:
  256. case TGSI_TEXTURE_CUBE:
  257. count = 3;
  258. break;
  259. default:
  260. assert(0);
  261. return;
  262. }
  263. if(apply_lodbias)
  264. lodbias = emit_fetch( bld, inst, 0, 3 );
  265. else
  266. lodbias = bld->base.zero;
  267. if(!bld->store_ptr)
  268. bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
  269. vec_type,
  270. LLVMConstInt(LLVMInt32Type(), 4, 0),
  271. "store");
  272. if (projected) {
  273. oow = emit_fetch( bld, inst, 0, 3 );
  274. oow = lp_build_rcp(&bld->base, oow);
  275. }
  276. for (i = 0; i < count; i++) {
  277. LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
  278. LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
  279. LLVMValueRef coord;
  280. coord = emit_fetch( bld, inst, 0, i );
  281. if (projected)
  282. coord = lp_build_mul(&bld->base, coord, oow);
  283. LLVMBuildStore(bld->base.builder, coord, coord_ptr);
  284. }
  285. args[0] = bld->samplers_ptr;
  286. args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
  287. args[2] = bld->store_ptr;
  288. lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
  289. FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
  290. LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
  291. LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
  292. LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
  293. emit_store( bld, inst, 0, i, res );
  294. }
  295. }
  296. static void
  297. emit_kil(
  298. struct lp_build_tgsi_soa_context *bld,
  299. const struct tgsi_full_instruction *inst )
  300. {
  301. const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
  302. LLVMValueRef terms[NUM_CHANNELS];
  303. unsigned chan_index;
  304. memset(&terms, 0, sizeof terms);
  305. FOR_EACH_CHANNEL( chan_index ) {
  306. unsigned swizzle;
  307. /* Unswizzle channel */
  308. swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
  309. /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
  310. * not to be tested. */
  311. if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
  312. continue;
  313. /* Check if the component has not been already tested. */
  314. assert(swizzle < NUM_CHANNELS);
  315. if( !terms[swizzle] )
  316. /* TODO: change the comparison operator instead of setting the sign */
  317. terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
  318. }
  319. FOR_EACH_CHANNEL( chan_index ) {
  320. if(terms[chan_index]) {
  321. LLVMValueRef mask;
  322. mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
  323. if(bld->mask)
  324. bld->mask = LLVMBuildAnd(bld->base.builder, bld->mask, mask, "");
  325. else
  326. bld->mask = mask;
  327. }
  328. }
  329. }
  330. static void
  331. emit_kilp(
  332. struct lp_build_tgsi_soa_context *bld )
  333. {
  334. /* XXX todo / fix me */
  335. }
  336. /**
  337. * Check if inst src/dest regs use indirect addressing into temporary
  338. * register file.
  339. */
  340. static boolean
  341. indirect_temp_reference(const struct tgsi_full_instruction *inst)
  342. {
  343. uint i;
  344. for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
  345. const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
  346. if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
  347. reg->SrcRegister.Indirect)
  348. return TRUE;
  349. }
  350. for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
  351. const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
  352. if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
  353. reg->DstRegister.Indirect)
  354. return TRUE;
  355. }
  356. return FALSE;
  357. }
  358. static int
  359. emit_instruction(
  360. struct lp_build_tgsi_soa_context *bld,
  361. struct tgsi_full_instruction *inst )
  362. {
  363. unsigned chan_index;
  364. LLVMValueRef src0, src1, src2;
  365. LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  366. LLVMValueRef dst0;
  367. /* we can't handle indirect addressing into temp register file yet */
  368. if (indirect_temp_reference(inst))
  369. return FALSE;
  370. switch (inst->Instruction.Opcode) {
  371. #if 0
  372. case TGSI_OPCODE_ARL:
  373. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  374. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  375. emit_flr(bld, 0, 0);
  376. emit_f2it( bld, 0 );
  377. emit_store( bld, inst, 0, chan_index, tmp0);
  378. }
  379. break;
  380. #endif
  381. case TGSI_OPCODE_MOV:
  382. case TGSI_OPCODE_SWZ:
  383. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  384. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  385. emit_store( bld, inst, 0, chan_index, tmp0);
  386. }
  387. break;
  388. case TGSI_OPCODE_LIT:
  389. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
  390. emit_store( bld, inst, 0, CHAN_X, bld->base.one);
  391. }
  392. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
  393. src0 = emit_fetch( bld, inst, 0, CHAN_X );
  394. dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
  395. emit_store( bld, inst, 0, CHAN_Y, dst0);
  396. }
  397. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
  398. /* XMM[1] = SrcReg[0].yyyy */
  399. tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
  400. /* XMM[1] = max(XMM[1], 0) */
  401. tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
  402. /* XMM[2] = SrcReg[0].wwww */
  403. tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
  404. tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
  405. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  406. tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
  407. dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
  408. emit_store( bld, inst, 0, CHAN_Z, dst0);
  409. }
  410. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
  411. emit_store( bld, inst, 0, CHAN_W, bld->base.one);
  412. }
  413. break;
  414. case TGSI_OPCODE_RCP:
  415. /* TGSI_OPCODE_RECIP */
  416. src0 = emit_fetch( bld, inst, 0, CHAN_X );
  417. dst0 = lp_build_rcp(&bld->base, src0);
  418. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  419. emit_store( bld, inst, 0, chan_index, dst0 );
  420. }
  421. break;
  422. case TGSI_OPCODE_RSQ:
  423. /* TGSI_OPCODE_RECIPSQRT */
  424. src0 = emit_fetch( bld, inst, 0, CHAN_X );
  425. src0 = lp_build_abs(&bld->base, src0);
  426. dst0 = lp_build_rsqrt(&bld->base, src0);
  427. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  428. emit_store( bld, inst, 0, chan_index, dst0 );
  429. }
  430. break;
  431. case TGSI_OPCODE_EXP:
  432. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
  433. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
  434. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
  435. LLVMValueRef *p_exp2_int_part = NULL;
  436. LLVMValueRef *p_frac_part = NULL;
  437. LLVMValueRef *p_exp2 = NULL;
  438. src0 = emit_fetch( bld, inst, 0, CHAN_X );
  439. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
  440. p_exp2_int_part = &tmp0;
  441. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
  442. p_frac_part = &tmp1;
  443. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
  444. p_exp2 = &tmp2;
  445. lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
  446. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
  447. emit_store( bld, inst, 0, CHAN_X, tmp0);
  448. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
  449. emit_store( bld, inst, 0, CHAN_Y, tmp1);
  450. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
  451. emit_store( bld, inst, 0, CHAN_Z, tmp2);
  452. }
  453. /* dst.w = 1.0 */
  454. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
  455. tmp0 = bld->base.one;
  456. emit_store( bld, inst, 0, CHAN_W, tmp0);
  457. }
  458. break;
  459. case TGSI_OPCODE_LOG:
  460. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
  461. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
  462. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
  463. LLVMValueRef *p_floor_log2;
  464. LLVMValueRef *p_exp;
  465. LLVMValueRef *p_log2;
  466. src0 = emit_fetch( bld, inst, 0, CHAN_X );
  467. src0 = lp_build_abs( &bld->base, src0 );
  468. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
  469. p_floor_log2 = &tmp0;
  470. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
  471. p_exp = &tmp1;
  472. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
  473. p_log2 = &tmp2;
  474. lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
  475. /* dst.x = floor(lg2(abs(src.x))) */
  476. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
  477. emit_store( bld, inst, 0, CHAN_X, tmp0);
  478. /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
  479. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
  480. tmp1 = lp_build_div( &bld->base, src0, tmp1);
  481. emit_store( bld, inst, 0, CHAN_Y, tmp1);
  482. }
  483. /* dst.z = lg2(abs(src.x)) */
  484. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
  485. emit_store( bld, inst, 0, CHAN_Z, tmp2);
  486. }
  487. /* dst.w = 1.0 */
  488. if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
  489. tmp0 = bld->base.one;
  490. emit_store( bld, inst, 0, CHAN_W, tmp0);
  491. }
  492. break;
  493. case TGSI_OPCODE_MUL:
  494. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  495. src0 = emit_fetch( bld, inst, 0, chan_index );
  496. src1 = emit_fetch( bld, inst, 1, chan_index );
  497. dst0 = lp_build_mul(&bld->base, src0, src1);
  498. emit_store( bld, inst, 0, chan_index, dst0);
  499. }
  500. break;
  501. case TGSI_OPCODE_ADD:
  502. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  503. src0 = emit_fetch( bld, inst, 0, chan_index );
  504. src1 = emit_fetch( bld, inst, 1, chan_index );
  505. dst0 = lp_build_add(&bld->base, src0, src1);
  506. emit_store( bld, inst, 0, chan_index, dst0);
  507. }
  508. break;
  509. case TGSI_OPCODE_DP3:
  510. /* TGSI_OPCODE_DOT3 */
  511. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  512. tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
  513. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
  514. tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
  515. tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
  516. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  517. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  518. tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
  519. tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
  520. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  521. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  522. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  523. emit_store( bld, inst, 0, chan_index, tmp0);
  524. }
  525. break;
  526. case TGSI_OPCODE_DP4:
  527. /* TGSI_OPCODE_DOT4 */
  528. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  529. tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
  530. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
  531. tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
  532. tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
  533. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  534. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  535. tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
  536. tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
  537. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  538. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  539. tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
  540. tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
  541. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  542. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  543. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  544. emit_store( bld, inst, 0, chan_index, tmp0);
  545. }
  546. break;
  547. case TGSI_OPCODE_DST:
  548. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
  549. tmp0 = bld->base.one;
  550. emit_store( bld, inst, 0, CHAN_X, tmp0);
  551. }
  552. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
  553. tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
  554. tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
  555. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
  556. emit_store( bld, inst, 0, CHAN_Y, tmp0);
  557. }
  558. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
  559. tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
  560. emit_store( bld, inst, 0, CHAN_Z, tmp0);
  561. }
  562. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
  563. tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
  564. emit_store( bld, inst, 0, CHAN_W, tmp0);
  565. }
  566. break;
  567. case TGSI_OPCODE_MIN:
  568. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  569. src0 = emit_fetch( bld, inst, 0, chan_index );
  570. src1 = emit_fetch( bld, inst, 1, chan_index );
  571. dst0 = lp_build_min( &bld->base, src0, src1 );
  572. emit_store( bld, inst, 0, chan_index, dst0);
  573. }
  574. break;
  575. case TGSI_OPCODE_MAX:
  576. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  577. src0 = emit_fetch( bld, inst, 0, chan_index );
  578. src1 = emit_fetch( bld, inst, 1, chan_index );
  579. dst0 = lp_build_max( &bld->base, src0, src1 );
  580. emit_store( bld, inst, 0, chan_index, dst0);
  581. }
  582. break;
  583. case TGSI_OPCODE_SLT:
  584. /* TGSI_OPCODE_SETLT */
  585. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  586. src0 = emit_fetch( bld, inst, 0, chan_index );
  587. src1 = emit_fetch( bld, inst, 1, chan_index );
  588. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
  589. dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
  590. emit_store( bld, inst, 0, chan_index, dst0);
  591. }
  592. break;
  593. case TGSI_OPCODE_SGE:
  594. /* TGSI_OPCODE_SETGE */
  595. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  596. src0 = emit_fetch( bld, inst, 0, chan_index );
  597. src1 = emit_fetch( bld, inst, 1, chan_index );
  598. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
  599. dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
  600. emit_store( bld, inst, 0, chan_index, dst0);
  601. }
  602. break;
  603. case TGSI_OPCODE_MAD:
  604. /* TGSI_OPCODE_MADD */
  605. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  606. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  607. tmp1 = emit_fetch( bld, inst, 1, chan_index );
  608. tmp2 = emit_fetch( bld, inst, 2, chan_index );
  609. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
  610. tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
  611. emit_store( bld, inst, 0, chan_index, tmp0);
  612. }
  613. break;
  614. case TGSI_OPCODE_SUB:
  615. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  616. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  617. tmp1 = emit_fetch( bld, inst, 1, chan_index );
  618. tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
  619. emit_store( bld, inst, 0, chan_index, tmp0);
  620. }
  621. break;
  622. case TGSI_OPCODE_LRP:
  623. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  624. src0 = emit_fetch( bld, inst, 0, chan_index );
  625. src1 = emit_fetch( bld, inst, 1, chan_index );
  626. src2 = emit_fetch( bld, inst, 2, chan_index );
  627. tmp0 = lp_build_sub( &bld->base, src1, src2 );
  628. tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
  629. dst0 = lp_build_add( &bld->base, tmp0, src2 );
  630. emit_store( bld, inst, 0, chan_index, dst0 );
  631. }
  632. break;
  633. case TGSI_OPCODE_CND:
  634. return 0;
  635. break;
  636. case TGSI_OPCODE_CND0:
  637. return 0;
  638. break;
  639. case TGSI_OPCODE_DP2A:
  640. tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
  641. tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
  642. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
  643. tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
  644. tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
  645. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
  646. tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
  647. tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
  648. tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
  649. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  650. emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
  651. }
  652. break;
  653. #if 0
  654. case TGSI_OPCODE_FRC:
  655. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  656. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  657. emit_frc( bld, 0, 0 );
  658. emit_store( bld, inst, 0, chan_index, tmp0);
  659. }
  660. break;
  661. case TGSI_OPCODE_CLAMP:
  662. return 0;
  663. break;
  664. case TGSI_OPCODE_FLR:
  665. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  666. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  667. emit_flr( bld, 0, 0 );
  668. emit_store( bld, inst, 0, chan_index, tmp0);
  669. }
  670. break;
  671. case TGSI_OPCODE_ROUND:
  672. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  673. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  674. emit_rnd( bld, 0, 0 );
  675. emit_store( bld, inst, 0, chan_index, tmp0);
  676. }
  677. break;
  678. #endif
  679. case TGSI_OPCODE_EX2: {
  680. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  681. tmp0 = lp_build_exp2( &bld->base, tmp0);
  682. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  683. emit_store( bld, inst, 0, chan_index, tmp0);
  684. }
  685. break;
  686. }
  687. case TGSI_OPCODE_LG2:
  688. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  689. tmp0 = lp_build_log2( &bld->base, tmp0);
  690. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  691. emit_store( bld, inst, 0, chan_index, tmp0);
  692. }
  693. break;
  694. case TGSI_OPCODE_POW:
  695. src0 = emit_fetch( bld, inst, 0, CHAN_X );
  696. src1 = emit_fetch( bld, inst, 1, CHAN_X );
  697. dst0 = lp_build_pow( &bld->base, src0, src1 );
  698. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  699. emit_store( bld, inst, 0, chan_index, dst0 );
  700. }
  701. break;
  702. case TGSI_OPCODE_XPD:
  703. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
  704. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
  705. tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
  706. tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
  707. }
  708. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
  709. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
  710. tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
  711. tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
  712. }
  713. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
  714. tmp2 = tmp0;
  715. tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
  716. tmp5 = tmp3;
  717. tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
  718. tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
  719. emit_store( bld, inst, 0, CHAN_X, tmp2);
  720. }
  721. if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
  722. IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
  723. tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
  724. tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
  725. }
  726. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
  727. tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
  728. tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
  729. tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
  730. emit_store( bld, inst, 0, CHAN_Y, tmp3);
  731. }
  732. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
  733. tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
  734. tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
  735. tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
  736. emit_store( bld, inst, 0, CHAN_Z, tmp5);
  737. }
  738. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
  739. tmp0 = bld->base.one;
  740. emit_store( bld, inst, 0, CHAN_W, tmp0);
  741. }
  742. break;
  743. case TGSI_OPCODE_ABS:
  744. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  745. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  746. tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
  747. emit_store( bld, inst, 0, chan_index, tmp0);
  748. }
  749. break;
  750. case TGSI_OPCODE_RCC:
  751. return 0;
  752. break;
  753. case TGSI_OPCODE_DPH:
  754. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  755. tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
  756. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
  757. tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
  758. tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
  759. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  760. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  761. tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
  762. tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
  763. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
  764. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  765. tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
  766. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  767. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  768. emit_store( bld, inst, 0, chan_index, tmp0);
  769. }
  770. break;
  771. case TGSI_OPCODE_COS:
  772. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  773. tmp0 = lp_build_cos( &bld->base, tmp0 );
  774. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  775. emit_store( bld, inst, 0, chan_index, tmp0);
  776. }
  777. break;
  778. case TGSI_OPCODE_DDX:
  779. return 0;
  780. break;
  781. case TGSI_OPCODE_DDY:
  782. return 0;
  783. break;
  784. #if 0
  785. case TGSI_OPCODE_KILP:
  786. /* predicated kill */
  787. emit_kilp( bld );
  788. return 0; /* XXX fix me */
  789. break;
  790. #endif
  791. case TGSI_OPCODE_KIL:
  792. /* conditional kill */
  793. emit_kil( bld, inst );
  794. break;
  795. case TGSI_OPCODE_PK2H:
  796. return 0;
  797. break;
  798. case TGSI_OPCODE_PK2US:
  799. return 0;
  800. break;
  801. case TGSI_OPCODE_PK4B:
  802. return 0;
  803. break;
  804. case TGSI_OPCODE_PK4UB:
  805. return 0;
  806. break;
  807. case TGSI_OPCODE_RFL:
  808. return 0;
  809. break;
  810. case TGSI_OPCODE_SEQ:
  811. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  812. src0 = emit_fetch( bld, inst, 0, chan_index );
  813. src1 = emit_fetch( bld, inst, 1, chan_index );
  814. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
  815. dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
  816. emit_store( bld, inst, 0, chan_index, dst0);
  817. }
  818. break;
  819. case TGSI_OPCODE_SFL:
  820. return 0;
  821. break;
  822. case TGSI_OPCODE_SGT:
  823. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  824. src0 = emit_fetch( bld, inst, 0, chan_index );
  825. src1 = emit_fetch( bld, inst, 1, chan_index );
  826. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
  827. dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
  828. emit_store( bld, inst, 0, chan_index, dst0);
  829. }
  830. break;
  831. case TGSI_OPCODE_SIN:
  832. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  833. tmp0 = lp_build_sin( &bld->base, tmp0 );
  834. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  835. emit_store( bld, inst, 0, chan_index, tmp0);
  836. }
  837. break;
  838. case TGSI_OPCODE_SLE:
  839. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  840. src0 = emit_fetch( bld, inst, 0, chan_index );
  841. src1 = emit_fetch( bld, inst, 1, chan_index );
  842. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
  843. dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
  844. emit_store( bld, inst, 0, chan_index, dst0);
  845. }
  846. break;
  847. case TGSI_OPCODE_SNE:
  848. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  849. src0 = emit_fetch( bld, inst, 0, chan_index );
  850. src1 = emit_fetch( bld, inst, 1, chan_index );
  851. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
  852. dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
  853. emit_store( bld, inst, 0, chan_index, dst0);
  854. }
  855. break;
  856. case TGSI_OPCODE_STR:
  857. return 0;
  858. break;
  859. case TGSI_OPCODE_TEX:
  860. emit_tex( bld, inst, FALSE, FALSE );
  861. break;
  862. case TGSI_OPCODE_TXD:
  863. return 0;
  864. break;
  865. case TGSI_OPCODE_UP2H:
  866. return 0;
  867. break;
  868. case TGSI_OPCODE_UP2US:
  869. return 0;
  870. break;
  871. case TGSI_OPCODE_UP4B:
  872. return 0;
  873. break;
  874. case TGSI_OPCODE_UP4UB:
  875. return 0;
  876. break;
  877. case TGSI_OPCODE_X2D:
  878. return 0;
  879. break;
  880. case TGSI_OPCODE_ARA:
  881. return 0;
  882. break;
  883. #if 0
  884. case TGSI_OPCODE_ARR:
  885. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  886. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  887. emit_rnd( bld, 0, 0 );
  888. emit_f2it( bld, 0 );
  889. emit_store( bld, inst, 0, chan_index, tmp0);
  890. }
  891. break;
  892. #endif
  893. case TGSI_OPCODE_BRA:
  894. return 0;
  895. break;
  896. case TGSI_OPCODE_CAL:
  897. return 0;
  898. break;
  899. #if 0
  900. case TGSI_OPCODE_RET:
  901. emit_ret( bld );
  902. break;
  903. #endif
  904. case TGSI_OPCODE_END:
  905. break;
  906. #if 0
  907. case TGSI_OPCODE_SSG:
  908. /* TGSI_OPCODE_SGN */
  909. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  910. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  911. emit_sgn( bld, 0, 0 );
  912. emit_store( bld, inst, 0, chan_index, tmp0);
  913. }
  914. break;
  915. #endif
  916. case TGSI_OPCODE_CMP:
  917. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  918. src0 = emit_fetch( bld, inst, 0, chan_index );
  919. src1 = emit_fetch( bld, inst, 1, chan_index );
  920. src2 = emit_fetch( bld, inst, 2, chan_index );
  921. tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
  922. dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
  923. emit_store( bld, inst, 0, chan_index, dst0);
  924. }
  925. break;
  926. case TGSI_OPCODE_SCS:
  927. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
  928. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  929. tmp0 = lp_build_cos( &bld->base, tmp0 );
  930. emit_store( bld, inst, 0, CHAN_X, tmp0);
  931. }
  932. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
  933. tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
  934. tmp0 = lp_build_sin( &bld->base, tmp0 );
  935. emit_store( bld, inst, 0, CHAN_Y, tmp0);
  936. }
  937. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
  938. tmp0 = bld->base.zero;
  939. emit_store( bld, inst, 0, CHAN_Z, tmp0);
  940. }
  941. IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
  942. tmp0 = bld->base.one;
  943. emit_store( bld, inst, 0, CHAN_W, tmp0);
  944. }
  945. break;
  946. case TGSI_OPCODE_TXB:
  947. emit_tex( bld, inst, TRUE, FALSE );
  948. break;
  949. case TGSI_OPCODE_NRM:
  950. /* fall-through */
  951. case TGSI_OPCODE_NRM4:
  952. /* 3 or 4-component normalization */
  953. {
  954. uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
  955. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
  956. IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
  957. IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
  958. (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
  959. /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
  960. /* xmm4 = src.x */
  961. /* xmm0 = src.x * src.x */
  962. tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
  963. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
  964. tmp4 = tmp0;
  965. }
  966. tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
  967. /* xmm5 = src.y */
  968. /* xmm0 = xmm0 + src.y * src.y */
  969. tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
  970. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
  971. tmp5 = tmp1;
  972. }
  973. tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
  974. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  975. /* xmm6 = src.z */
  976. /* xmm0 = xmm0 + src.z * src.z */
  977. tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
  978. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
  979. tmp6 = tmp1;
  980. }
  981. tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
  982. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  983. if (dims == 4) {
  984. /* xmm7 = src.w */
  985. /* xmm0 = xmm0 + src.w * src.w */
  986. tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
  987. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
  988. tmp7 = tmp1;
  989. }
  990. tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
  991. tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
  992. }
  993. /* xmm1 = 1 / sqrt(xmm0) */
  994. tmp1 = lp_build_rsqrt( &bld->base, tmp0);
  995. /* dst.x = xmm1 * src.x */
  996. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
  997. tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
  998. emit_store(bld, inst, 0, CHAN_X, tmp4);
  999. }
  1000. /* dst.y = xmm1 * src.y */
  1001. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
  1002. tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
  1003. emit_store(bld, inst, 0, CHAN_Y, tmp5);
  1004. }
  1005. /* dst.z = xmm1 * src.z */
  1006. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
  1007. tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
  1008. emit_store(bld, inst, 0, CHAN_Z, tmp6);
  1009. }
  1010. /* dst.w = xmm1 * src.w */
  1011. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
  1012. tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
  1013. emit_store(bld, inst, 0, CHAN_W, tmp7);
  1014. }
  1015. }
  1016. /* dst0.w = 1.0 */
  1017. if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
  1018. tmp0 = bld->base.one;
  1019. emit_store(bld, inst, 0, CHAN_W, tmp0);
  1020. }
  1021. }
  1022. break;
  1023. case TGSI_OPCODE_DIV:
  1024. return 0;
  1025. break;
  1026. case TGSI_OPCODE_DP2:
  1027. tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
  1028. tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
  1029. tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
  1030. tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
  1031. tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
  1032. tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
  1033. tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
  1034. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  1035. emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
  1036. }
  1037. break;
  1038. case TGSI_OPCODE_TXL:
  1039. emit_tex( bld, inst, TRUE, FALSE );
  1040. break;
  1041. case TGSI_OPCODE_TXP:
  1042. emit_tex( bld, inst, FALSE, TRUE );
  1043. break;
  1044. case TGSI_OPCODE_BRK:
  1045. return 0;
  1046. break;
  1047. case TGSI_OPCODE_IF:
  1048. return 0;
  1049. break;
  1050. case TGSI_OPCODE_LOOP:
  1051. return 0;
  1052. break;
  1053. case TGSI_OPCODE_REP:
  1054. return 0;
  1055. break;
  1056. case TGSI_OPCODE_ELSE:
  1057. return 0;
  1058. break;
  1059. case TGSI_OPCODE_ENDIF:
  1060. return 0;
  1061. break;
  1062. case TGSI_OPCODE_ENDLOOP:
  1063. return 0;
  1064. break;
  1065. case TGSI_OPCODE_ENDREP:
  1066. return 0;
  1067. break;
  1068. case TGSI_OPCODE_PUSHA:
  1069. return 0;
  1070. break;
  1071. case TGSI_OPCODE_POPA:
  1072. return 0;
  1073. break;
  1074. case TGSI_OPCODE_CEIL:
  1075. return 0;
  1076. break;
  1077. case TGSI_OPCODE_I2F:
  1078. return 0;
  1079. break;
  1080. case TGSI_OPCODE_NOT:
  1081. return 0;
  1082. break;
  1083. #if 0
  1084. case TGSI_OPCODE_TRUNC:
  1085. FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  1086. tmp0 = emit_fetch( bld, inst, 0, chan_index );
  1087. emit_f2it( bld, 0 );
  1088. emit_i2f( bld, 0 );
  1089. emit_store( bld, inst, 0, chan_index, tmp0);
  1090. }
  1091. break;
  1092. #endif
  1093. case TGSI_OPCODE_SHL:
  1094. return 0;
  1095. break;
  1096. case TGSI_OPCODE_SHR:
  1097. return 0;
  1098. break;
  1099. case TGSI_OPCODE_AND:
  1100. return 0;
  1101. break;
  1102. case TGSI_OPCODE_OR:
  1103. return 0;
  1104. break;
  1105. case TGSI_OPCODE_MOD:
  1106. return 0;
  1107. break;
  1108. case TGSI_OPCODE_XOR:
  1109. return 0;
  1110. break;
  1111. case TGSI_OPCODE_SAD:
  1112. return 0;
  1113. break;
  1114. case TGSI_OPCODE_TXF:
  1115. return 0;
  1116. break;
  1117. case TGSI_OPCODE_TXQ:
  1118. return 0;
  1119. break;
  1120. case TGSI_OPCODE_CONT:
  1121. return 0;
  1122. break;
  1123. case TGSI_OPCODE_EMIT:
  1124. return 0;
  1125. break;
  1126. case TGSI_OPCODE_ENDPRIM:
  1127. return 0;
  1128. break;
  1129. default:
  1130. return 0;
  1131. }
  1132. return 1;
  1133. }
  1134. static void
  1135. emit_declaration(
  1136. struct lp_build_tgsi_soa_context *bld,
  1137. struct tgsi_full_declaration *decl )
  1138. {
  1139. if( decl->Declaration.File == TGSI_FILE_INPUT ) {
  1140. LLVMBuilderRef builder = bld->base.builder;
  1141. unsigned first, last, mask;
  1142. unsigned attrib, chan;
  1143. first = decl->DeclarationRange.First;
  1144. last = decl->DeclarationRange.Last;
  1145. mask = decl->Declaration.UsageMask;
  1146. for( attrib = first; attrib <= last; attrib++ ) {
  1147. for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
  1148. LLVMValueRef input = bld->base.undef;
  1149. if( mask & (1 << chan) ) {
  1150. LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), (1 + attrib)*NUM_CHANNELS + chan, 0);
  1151. LLVMValueRef a0;
  1152. LLVMValueRef dadx;
  1153. LLVMValueRef dady;
  1154. switch( decl->Declaration.Interpolate ) {
  1155. case TGSI_INTERPOLATE_PERSPECTIVE:
  1156. /* fall-through */
  1157. case TGSI_INTERPOLATE_LINEAR: {
  1158. LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
  1159. LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
  1160. dadx = LLVMBuildLoad(builder, dadx_ptr, "");
  1161. dady = LLVMBuildLoad(builder, dady_ptr, "");
  1162. dadx = lp_build_broadcast_scalar(&bld->base, dadx);
  1163. dady = lp_build_broadcast_scalar(&bld->base, dady);
  1164. lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
  1165. lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
  1166. /* fall-through */
  1167. }
  1168. case TGSI_INTERPOLATE_CONSTANT: {
  1169. LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
  1170. a0 = LLVMBuildLoad(builder, a0_ptr, "");
  1171. a0 = lp_build_broadcast_scalar(&bld->base, a0);
  1172. lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
  1173. break;
  1174. }
  1175. default:
  1176. assert(0);
  1177. break;
  1178. }
  1179. input = a0;
  1180. if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
  1181. input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
  1182. input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
  1183. }
  1184. if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
  1185. if(!bld->oow)
  1186. bld->oow = lp_build_rcp(&bld->base, bld->w);
  1187. input = lp_build_mul(&bld->base, input, bld->oow);
  1188. }
  1189. lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
  1190. }
  1191. bld->inputs[attrib][chan] = input;
  1192. }
  1193. }
  1194. }
  1195. }
  1196. /**
  1197. * Translate a TGSI vertex/fragment shader to SSE2 code.
  1198. * Slightly different things are done for vertex vs. fragment shaders.
  1199. *
  1200. * \param tokens the TGSI input shader
  1201. * \param bld the output SSE code/function
  1202. * \param immediates buffer to place immediates, later passed to SSE bld
  1203. * \param return 1 for success, 0 if translation failed
  1204. */
  1205. LLVMValueRef
  1206. lp_build_tgsi_soa(LLVMBuilderRef builder,
  1207. const struct tgsi_token *tokens,
  1208. union lp_type type,
  1209. LLVMValueRef *pos,
  1210. LLVMValueRef a0_ptr,
  1211. LLVMValueRef dadx_ptr,
  1212. LLVMValueRef dady_ptr,
  1213. LLVMValueRef consts_ptr,
  1214. LLVMValueRef (*outputs)[4],
  1215. LLVMValueRef samplers_ptr)
  1216. {
  1217. struct lp_build_tgsi_soa_context bld;
  1218. struct tgsi_parse_context parse;
  1219. uint num_immediates = 0;
  1220. unsigned i;
  1221. /* Setup build context */
  1222. memset(&bld, 0, sizeof bld);
  1223. lp_build_context_init(&bld.base, builder, type);
  1224. bld.x = pos[0];
  1225. bld.y = pos[1];
  1226. bld.w = pos[3];
  1227. bld.a0_ptr = a0_ptr;
  1228. bld.dadx_ptr = dadx_ptr;
  1229. bld.dady_ptr = dady_ptr;
  1230. bld.outputs = outputs;
  1231. bld.consts_ptr = consts_ptr;
  1232. bld.samplers_ptr = samplers_ptr;
  1233. tgsi_parse_init( &parse, tokens );
  1234. while( !tgsi_parse_end_of_tokens( &parse ) ) {
  1235. tgsi_parse_token( &parse );
  1236. switch( parse.FullToken.Token.Type ) {
  1237. case TGSI_TOKEN_TYPE_DECLARATION:
  1238. if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
  1239. emit_declaration( &bld, &parse.FullToken.FullDeclaration );
  1240. }
  1241. break;
  1242. case TGSI_TOKEN_TYPE_INSTRUCTION:
  1243. if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
  1244. unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
  1245. const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
  1246. _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
  1247. info ? info->mnemonic : "<invalid>");
  1248. }
  1249. break;
  1250. case TGSI_TOKEN_TYPE_IMMEDIATE:
  1251. /* simply copy the immediate values into the next immediates[] slot */
  1252. {
  1253. const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
  1254. assert(size <= 4);
  1255. assert(num_immediates < LP_MAX_IMMEDIATES);
  1256. for( i = 0; i < size; ++i )
  1257. bld.immediates[num_immediates][i] =
  1258. lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
  1259. for( i = size; i < 4; ++i )
  1260. bld.immediates[num_immediates][i] = bld.base.undef;
  1261. num_immediates++;
  1262. }
  1263. break;
  1264. default:
  1265. assert( 0 );
  1266. }
  1267. }
  1268. tgsi_parse_free( &parse );
  1269. return bld.mask;
  1270. }