Clone of mesa.
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

lp_bld_sample.c 28KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. /**************************************************************************
  2. *
  3. * Copyright 2009 VMware, Inc.
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /**
  28. * @file
  29. * Texture sampling -- common code.
  30. *
  31. * @author Jose Fonseca <jfonseca@vmware.com>
  32. */
  33. #include "pipe/p_defines.h"
  34. #include "pipe/p_state.h"
  35. #include "util/u_format.h"
  36. #include "util/u_math.h"
  37. #include "lp_bld_arit.h"
  38. #include "lp_bld_const.h"
  39. #include "lp_bld_debug.h"
  40. #include "lp_bld_flow.h"
  41. #include "lp_bld_sample.h"
  42. #include "lp_bld_swizzle.h"
  43. #include "lp_bld_type.h"
  44. /**
  45. * Does the given texture wrap mode allow sampling the texture border color?
  46. * XXX maybe move this into gallium util code.
  47. */
  48. boolean
  49. lp_sampler_wrap_mode_uses_border_color(unsigned mode,
  50. unsigned min_img_filter,
  51. unsigned mag_img_filter)
  52. {
  53. switch (mode) {
  54. case PIPE_TEX_WRAP_REPEAT:
  55. case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
  56. case PIPE_TEX_WRAP_MIRROR_REPEAT:
  57. case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  58. return FALSE;
  59. case PIPE_TEX_WRAP_CLAMP:
  60. case PIPE_TEX_WRAP_MIRROR_CLAMP:
  61. if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
  62. mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
  63. return FALSE;
  64. } else {
  65. return TRUE;
  66. }
  67. case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
  68. case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  69. return TRUE;
  70. default:
  71. assert(0 && "unexpected wrap mode");
  72. return FALSE;
  73. }
  74. }
  75. /**
  76. * Initialize lp_sampler_static_state object with the gallium sampler
  77. * and texture state.
  78. * The former is considered to be static and the later dynamic.
  79. */
  80. void
  81. lp_sampler_static_state(struct lp_sampler_static_state *state,
  82. const struct pipe_sampler_view *view,
  83. const struct pipe_sampler_state *sampler)
  84. {
  85. const struct pipe_resource *texture = view->texture;
  86. memset(state, 0, sizeof *state);
  87. if(!texture)
  88. return;
  89. if(!sampler)
  90. return;
  91. /*
  92. * We don't copy sampler state over unless it is actually enabled, to avoid
  93. * spurious recompiles, as the sampler static state is part of the shader
  94. * key.
  95. *
  96. * Ideally the state tracker or cso_cache module would make all state
  97. * canonical, but until that happens it's better to be safe than sorry here.
  98. *
  99. * XXX: Actually there's much more than can be done here, especially
  100. * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
  101. */
  102. state->format = view->format;
  103. state->swizzle_r = view->swizzle_r;
  104. state->swizzle_g = view->swizzle_g;
  105. state->swizzle_b = view->swizzle_b;
  106. state->swizzle_a = view->swizzle_a;
  107. state->target = texture->target;
  108. state->pot_width = util_is_power_of_two(texture->width0);
  109. state->pot_height = util_is_power_of_two(texture->height0);
  110. state->pot_depth = util_is_power_of_two(texture->depth0);
  111. state->wrap_s = sampler->wrap_s;
  112. state->wrap_t = sampler->wrap_t;
  113. state->wrap_r = sampler->wrap_r;
  114. state->min_img_filter = sampler->min_img_filter;
  115. state->mag_img_filter = sampler->mag_img_filter;
  116. if (view->last_level) {
  117. state->min_mip_filter = sampler->min_mip_filter;
  118. } else {
  119. state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
  120. }
  121. /* If min_lod == max_lod we can greatly simplify mipmap selection.
  122. * This is a case that occurs during automatic mipmap generation.
  123. */
  124. if (sampler->min_lod == sampler->max_lod) {
  125. state->min_max_lod_equal = 1;
  126. }
  127. state->compare_mode = sampler->compare_mode;
  128. if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
  129. state->compare_func = sampler->compare_func;
  130. }
  131. state->normalized_coords = sampler->normalized_coords;
  132. /*
  133. * FIXME: Handle the remainder of pipe_sampler_view.
  134. */
  135. }
  136. /**
  137. * Generate code to compute texture level of detail (lambda).
  138. * \param ddx partial derivatives of (s, t, r, q) with respect to X
  139. * \param ddy partial derivatives of (s, t, r, q) with respect to Y
  140. * \param lod_bias optional float vector with the shader lod bias
  141. * \param explicit_lod optional float vector with the explicit lod
  142. * \param width scalar int texture width
  143. * \param height scalar int texture height
  144. * \param depth scalar int texture depth
  145. *
  146. * XXX: The resulting lod is scalar, so ignore all but the first element of
  147. * derivatives, lod_bias, etc that are passed by the shader.
  148. */
  149. LLVMValueRef
  150. lp_build_lod_selector(struct lp_build_sample_context *bld,
  151. unsigned unit,
  152. const LLVMValueRef ddx[4],
  153. const LLVMValueRef ddy[4],
  154. LLVMValueRef lod_bias, /* optional */
  155. LLVMValueRef explicit_lod, /* optional */
  156. LLVMValueRef width,
  157. LLVMValueRef height,
  158. LLVMValueRef depth)
  159. {
  160. LLVMValueRef min_lod =
  161. bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
  162. if (bld->static_state->min_max_lod_equal) {
  163. /* User is forcing sampling from a particular mipmap level.
  164. * This is hit during mipmap generation.
  165. */
  166. return min_lod;
  167. }
  168. else {
  169. struct lp_build_context *float_bld = &bld->float_bld;
  170. LLVMValueRef sampler_lod_bias =
  171. bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
  172. LLVMValueRef max_lod =
  173. bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);
  174. LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
  175. LLVMValueRef lod;
  176. if (explicit_lod) {
  177. lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
  178. index0, "");
  179. }
  180. else {
  181. const int dims = texture_dims(bld->static_state->target);
  182. LLVMValueRef dsdx, dsdy;
  183. LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
  184. LLVMValueRef rho;
  185. dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
  186. dsdx = lp_build_abs(float_bld, dsdx);
  187. dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
  188. dsdy = lp_build_abs(float_bld, dsdy);
  189. if (dims > 1) {
  190. dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
  191. dtdx = lp_build_abs(float_bld, dtdx);
  192. dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
  193. dtdy = lp_build_abs(float_bld, dtdy);
  194. if (dims > 2) {
  195. drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
  196. drdx = lp_build_abs(float_bld, drdx);
  197. drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
  198. drdy = lp_build_abs(float_bld, drdy);
  199. }
  200. }
  201. /* Compute rho = max of all partial derivatives scaled by texture size.
  202. * XXX this could be vectorized somewhat
  203. */
  204. rho = LLVMBuildFMul(bld->builder,
  205. lp_build_max(float_bld, dsdx, dsdy),
  206. lp_build_int_to_float(float_bld, width), "");
  207. if (dims > 1) {
  208. LLVMValueRef max;
  209. max = LLVMBuildFMul(bld->builder,
  210. lp_build_max(float_bld, dtdx, dtdy),
  211. lp_build_int_to_float(float_bld, height), "");
  212. rho = lp_build_max(float_bld, rho, max);
  213. if (dims > 2) {
  214. max = LLVMBuildFMul(bld->builder,
  215. lp_build_max(float_bld, drdx, drdy),
  216. lp_build_int_to_float(float_bld, depth), "");
  217. rho = lp_build_max(float_bld, rho, max);
  218. }
  219. }
  220. /* compute lod = log2(rho) */
  221. lod = lp_build_log2(float_bld, rho);
  222. /* add shader lod bias */
  223. if (lod_bias) {
  224. lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
  225. index0, "");
  226. lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
  227. }
  228. }
  229. /* add sampler lod bias */
  230. lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
  231. /* clamp lod */
  232. lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
  233. return lod;
  234. }
  235. }
  236. /**
  237. * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
  238. * mipmap level index.
  239. * Note: this is all scalar code.
  240. * \param lod scalar float texture level of detail
  241. * \param level_out returns integer
  242. */
  243. void
  244. lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
  245. unsigned unit,
  246. LLVMValueRef lod,
  247. LLVMValueRef *level_out)
  248. {
  249. struct lp_build_context *float_bld = &bld->float_bld;
  250. struct lp_build_context *int_bld = &bld->int_bld;
  251. LLVMValueRef last_level, level;
  252. LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
  253. last_level = bld->dynamic_state->last_level(bld->dynamic_state,
  254. bld->builder, unit);
  255. /* convert float lod to integer */
  256. level = lp_build_iround(float_bld, lod);
  257. /* clamp level to legal range of levels */
  258. *level_out = lp_build_clamp(int_bld, level, zero, last_level);
  259. }
  260. /**
  261. * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
  262. * two (adjacent) mipmap level indexes. Later, we'll sample from those
  263. * two mipmap levels and interpolate between them.
  264. */
  265. void
  266. lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
  267. unsigned unit,
  268. LLVMValueRef lod,
  269. LLVMValueRef *level0_out,
  270. LLVMValueRef *level1_out,
  271. LLVMValueRef *weight_out)
  272. {
  273. struct lp_build_context *float_bld = &bld->float_bld;
  274. struct lp_build_context *int_bld = &bld->int_bld;
  275. LLVMValueRef last_level, level;
  276. last_level = bld->dynamic_state->last_level(bld->dynamic_state,
  277. bld->builder, unit);
  278. /* convert float lod to integer */
  279. level = lp_build_ifloor(float_bld, lod);
  280. /* compute level 0 and clamp to legal range of levels */
  281. *level0_out = lp_build_clamp(int_bld, level,
  282. int_bld->zero,
  283. last_level);
  284. /* compute level 1 and clamp to legal range of levels */
  285. level = lp_build_add(int_bld, level, int_bld->one);
  286. *level1_out = lp_build_clamp(int_bld, level,
  287. int_bld->zero,
  288. last_level);
  289. *weight_out = lp_build_fract(float_bld, lod);
  290. }
  291. /**
  292. * Return pointer to a single mipmap level.
  293. * \param data_array array of pointers to mipmap levels
  294. * \param level integer mipmap level
  295. */
  296. LLVMValueRef
  297. lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
  298. LLVMValueRef data_array, LLVMValueRef level)
  299. {
  300. LLVMValueRef indexes[2], data_ptr;
  301. indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
  302. indexes[1] = level;
  303. data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
  304. data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
  305. return data_ptr;
  306. }
  307. LLVMValueRef
  308. lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
  309. LLVMValueRef data_array, int level)
  310. {
  311. LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
  312. return lp_build_get_mipmap_level(bld, data_array, lvl);
  313. }
  314. /**
  315. * Codegen equivalent for u_minify().
  316. * Return max(1, base_size >> level);
  317. */
  318. static LLVMValueRef
  319. lp_build_minify(struct lp_build_sample_context *bld,
  320. LLVMValueRef base_size,
  321. LLVMValueRef level)
  322. {
  323. if (level == bld->int_coord_bld.zero) {
  324. /* if we're using mipmap level zero, no minification is needed */
  325. return base_size;
  326. }
  327. else {
  328. LLVMValueRef size =
  329. LLVMBuildLShr(bld->builder, base_size, level, "minify");
  330. size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
  331. return size;
  332. }
  333. }
  334. /**
  335. * Dereference stride_array[mipmap_level] array to get a stride.
  336. * Return stride as a vector.
  337. */
  338. static LLVMValueRef
  339. lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
  340. LLVMValueRef stride_array, LLVMValueRef level)
  341. {
  342. LLVMValueRef indexes[2], stride;
  343. indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
  344. indexes[1] = level;
  345. stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
  346. stride = LLVMBuildLoad(bld->builder, stride, "");
  347. stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
  348. return stride;
  349. }
  350. /**
  351. * When sampling a mipmap, we need to compute the width, height, depth
  352. * of the source levels from the level indexes. This helper function
  353. * does that.
  354. */
  355. void
  356. lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
  357. unsigned dims,
  358. LLVMValueRef width_vec,
  359. LLVMValueRef height_vec,
  360. LLVMValueRef depth_vec,
  361. LLVMValueRef ilevel0,
  362. LLVMValueRef ilevel1,
  363. LLVMValueRef row_stride_array,
  364. LLVMValueRef img_stride_array,
  365. LLVMValueRef *width0_vec,
  366. LLVMValueRef *width1_vec,
  367. LLVMValueRef *height0_vec,
  368. LLVMValueRef *height1_vec,
  369. LLVMValueRef *depth0_vec,
  370. LLVMValueRef *depth1_vec,
  371. LLVMValueRef *row_stride0_vec,
  372. LLVMValueRef *row_stride1_vec,
  373. LLVMValueRef *img_stride0_vec,
  374. LLVMValueRef *img_stride1_vec)
  375. {
  376. const unsigned mip_filter = bld->static_state->min_mip_filter;
  377. LLVMValueRef ilevel0_vec, ilevel1_vec;
  378. ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
  379. if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
  380. ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
  381. /*
  382. * Compute width, height, depth at mipmap level 'ilevel0'
  383. */
  384. *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
  385. if (dims >= 2) {
  386. *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
  387. *row_stride0_vec = lp_build_get_level_stride_vec(bld,
  388. row_stride_array,
  389. ilevel0);
  390. if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
  391. *img_stride0_vec = lp_build_get_level_stride_vec(bld,
  392. img_stride_array,
  393. ilevel0);
  394. if (dims == 3) {
  395. *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
  396. }
  397. }
  398. }
  399. if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
  400. /* compute width, height, depth for second mipmap level at 'ilevel1' */
  401. *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
  402. if (dims >= 2) {
  403. *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
  404. *row_stride1_vec = lp_build_get_level_stride_vec(bld,
  405. row_stride_array,
  406. ilevel1);
  407. if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
  408. *img_stride1_vec = lp_build_get_level_stride_vec(bld,
  409. img_stride_array,
  410. ilevel1);
  411. if (dims == 3) {
  412. *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
  413. }
  414. }
  415. }
  416. }
  417. }
  418. /** Helper used by lp_build_cube_lookup() */
  419. static LLVMValueRef
  420. lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
  421. {
  422. /* ima = -0.5 / abs(coord); */
  423. LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
  424. LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
  425. LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
  426. return ima;
  427. }
  428. /**
  429. * Helper used by lp_build_cube_lookup()
  430. * \param sign scalar +1 or -1
  431. * \param coord float vector
  432. * \param ima float vector
  433. */
  434. static LLVMValueRef
  435. lp_build_cube_coord(struct lp_build_context *coord_bld,
  436. LLVMValueRef sign, int negate_coord,
  437. LLVMValueRef coord, LLVMValueRef ima)
  438. {
  439. /* return negate(coord) * ima * sign + 0.5; */
  440. LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
  441. LLVMValueRef res;
  442. assert(negate_coord == +1 || negate_coord == -1);
  443. if (negate_coord == -1) {
  444. coord = lp_build_negate(coord_bld, coord);
  445. }
  446. res = lp_build_mul(coord_bld, coord, ima);
  447. if (sign) {
  448. sign = lp_build_broadcast_scalar(coord_bld, sign);
  449. res = lp_build_mul(coord_bld, res, sign);
  450. }
  451. res = lp_build_add(coord_bld, res, half);
  452. return res;
  453. }
  454. /** Helper used by lp_build_cube_lookup()
  455. * Return (major_coord >= 0) ? pos_face : neg_face;
  456. */
  457. static LLVMValueRef
  458. lp_build_cube_face(struct lp_build_sample_context *bld,
  459. LLVMValueRef major_coord,
  460. unsigned pos_face, unsigned neg_face)
  461. {
  462. LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
  463. major_coord,
  464. bld->float_bld.zero, "");
  465. LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
  466. LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
  467. LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
  468. return res;
  469. }
  470. /**
  471. * Generate code to do cube face selection and compute per-face texcoords.
  472. */
  473. void
  474. lp_build_cube_lookup(struct lp_build_sample_context *bld,
  475. LLVMValueRef s,
  476. LLVMValueRef t,
  477. LLVMValueRef r,
  478. LLVMValueRef *face,
  479. LLVMValueRef *face_s,
  480. LLVMValueRef *face_t)
  481. {
  482. struct lp_build_context *float_bld = &bld->float_bld;
  483. struct lp_build_context *coord_bld = &bld->coord_bld;
  484. LLVMValueRef rx, ry, rz;
  485. LLVMValueRef arx, ary, arz;
  486. LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
  487. LLVMValueRef arx_ge_ary, arx_ge_arz;
  488. LLVMValueRef ary_ge_arx, ary_ge_arz;
  489. LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
  490. LLVMValueRef rx_pos, ry_pos, rz_pos;
  491. assert(bld->coord_bld.type.length == 4);
  492. /*
  493. * Use the average of the four pixel's texcoords to choose the face.
  494. */
  495. rx = lp_build_mul(float_bld, c25,
  496. lp_build_sum_vector(&bld->coord_bld, s));
  497. ry = lp_build_mul(float_bld, c25,
  498. lp_build_sum_vector(&bld->coord_bld, t));
  499. rz = lp_build_mul(float_bld, c25,
  500. lp_build_sum_vector(&bld->coord_bld, r));
  501. arx = lp_build_abs(float_bld, rx);
  502. ary = lp_build_abs(float_bld, ry);
  503. arz = lp_build_abs(float_bld, rz);
  504. /*
  505. * Compare sign/magnitude of rx,ry,rz to determine face
  506. */
  507. arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
  508. arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
  509. ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
  510. ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
  511. arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
  512. ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
  513. rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
  514. ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
  515. rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
  516. {
  517. struct lp_build_flow_context *flow_ctx;
  518. struct lp_build_if_state if_ctx;
  519. flow_ctx = lp_build_flow_create(bld->builder);
  520. lp_build_flow_scope_begin(flow_ctx);
  521. *face_s = bld->coord_bld.undef;
  522. *face_t = bld->coord_bld.undef;
  523. *face = bld->int_bld.undef;
  524. lp_build_name(*face_s, "face_s");
  525. lp_build_name(*face_t, "face_t");
  526. lp_build_name(*face, "face");
  527. lp_build_flow_scope_declare(flow_ctx, face_s);
  528. lp_build_flow_scope_declare(flow_ctx, face_t);
  529. lp_build_flow_scope_declare(flow_ctx, face);
  530. lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
  531. {
  532. /* +/- X face */
  533. LLVMValueRef sign = lp_build_sgn(float_bld, rx);
  534. LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
  535. *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
  536. *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
  537. *face = lp_build_cube_face(bld, rx,
  538. PIPE_TEX_FACE_POS_X,
  539. PIPE_TEX_FACE_NEG_X);
  540. }
  541. lp_build_else(&if_ctx);
  542. {
  543. struct lp_build_flow_context *flow_ctx2;
  544. struct lp_build_if_state if_ctx2;
  545. LLVMValueRef face_s2 = bld->coord_bld.undef;
  546. LLVMValueRef face_t2 = bld->coord_bld.undef;
  547. LLVMValueRef face2 = bld->int_bld.undef;
  548. flow_ctx2 = lp_build_flow_create(bld->builder);
  549. lp_build_flow_scope_begin(flow_ctx2);
  550. lp_build_flow_scope_declare(flow_ctx2, &face_s2);
  551. lp_build_flow_scope_declare(flow_ctx2, &face_t2);
  552. lp_build_flow_scope_declare(flow_ctx2, &face2);
  553. ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
  554. lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
  555. {
  556. /* +/- Y face */
  557. LLVMValueRef sign = lp_build_sgn(float_bld, ry);
  558. LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
  559. face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
  560. face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
  561. face2 = lp_build_cube_face(bld, ry,
  562. PIPE_TEX_FACE_POS_Y,
  563. PIPE_TEX_FACE_NEG_Y);
  564. }
  565. lp_build_else(&if_ctx2);
  566. {
  567. /* +/- Z face */
  568. LLVMValueRef sign = lp_build_sgn(float_bld, rz);
  569. LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
  570. face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
  571. face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
  572. face2 = lp_build_cube_face(bld, rz,
  573. PIPE_TEX_FACE_POS_Z,
  574. PIPE_TEX_FACE_NEG_Z);
  575. }
  576. lp_build_endif(&if_ctx2);
  577. lp_build_flow_scope_end(flow_ctx2);
  578. lp_build_flow_destroy(flow_ctx2);
  579. *face_s = face_s2;
  580. *face_t = face_t2;
  581. *face = face2;
  582. }
  583. lp_build_endif(&if_ctx);
  584. lp_build_flow_scope_end(flow_ctx);
  585. lp_build_flow_destroy(flow_ctx);
  586. }
  587. }
  588. /**
  589. * Compute the partial offset of a pixel block along an arbitrary axis.
  590. *
  591. * @param coord coordinate in pixels
  592. * @param stride number of bytes between rows of successive pixel blocks
  593. * @param block_length number of pixels in a pixels block along the coordinate
  594. * axis
  595. * @param out_offset resulting relative offset of the pixel block in bytes
  596. * @param out_subcoord resulting sub-block pixel coordinate
  597. */
  598. void
  599. lp_build_sample_partial_offset(struct lp_build_context *bld,
  600. unsigned block_length,
  601. LLVMValueRef coord,
  602. LLVMValueRef stride,
  603. LLVMValueRef *out_offset,
  604. LLVMValueRef *out_subcoord)
  605. {
  606. LLVMValueRef offset;
  607. LLVMValueRef subcoord;
  608. if (block_length == 1) {
  609. subcoord = bld->zero;
  610. }
  611. else {
  612. /*
  613. * Pixel blocks have power of two dimensions. LLVM should convert the
  614. * rem/div to bit arithmetic.
  615. * TODO: Verify this.
  616. * It does indeed BUT it does transform it to scalar (and back) when doing so
  617. * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
  618. * The generated code looks seriously unfunny and is quite expensive.
  619. */
  620. #if 0
  621. LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
  622. subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
  623. coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
  624. #else
  625. unsigned logbase2 = util_unsigned_logbase2(block_length);
  626. LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2);
  627. LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1);
  628. subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, "");
  629. coord = LLVMBuildLShr(bld->builder, coord, block_shift, "");
  630. #endif
  631. }
  632. offset = lp_build_mul(bld, coord, stride);
  633. assert(out_offset);
  634. assert(out_subcoord);
  635. *out_offset = offset;
  636. *out_subcoord = subcoord;
  637. }
  638. /**
  639. * Compute the offset of a pixel block.
  640. *
  641. * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
  642. *
  643. * Returns the relative offset and i,j sub-block coordinates
  644. */
  645. void
  646. lp_build_sample_offset(struct lp_build_context *bld,
  647. const struct util_format_description *format_desc,
  648. LLVMValueRef x,
  649. LLVMValueRef y,
  650. LLVMValueRef z,
  651. LLVMValueRef y_stride,
  652. LLVMValueRef z_stride,
  653. LLVMValueRef *out_offset,
  654. LLVMValueRef *out_i,
  655. LLVMValueRef *out_j)
  656. {
  657. LLVMValueRef x_stride;
  658. LLVMValueRef offset;
  659. x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
  660. lp_build_sample_partial_offset(bld,
  661. format_desc->block.width,
  662. x, x_stride,
  663. &offset, out_i);
  664. if (y && y_stride) {
  665. LLVMValueRef y_offset;
  666. lp_build_sample_partial_offset(bld,
  667. format_desc->block.height,
  668. y, y_stride,
  669. &y_offset, out_j);
  670. offset = lp_build_add(bld, offset, y_offset);
  671. }
  672. else {
  673. *out_j = bld->zero;
  674. }
  675. if (z && z_stride) {
  676. LLVMValueRef z_offset;
  677. LLVMValueRef k;
  678. lp_build_sample_partial_offset(bld,
  679. 1, /* pixel blocks are always 2D */
  680. z, z_stride,
  681. &z_offset, &k);
  682. offset = lp_build_add(bld, offset, z_offset);
  683. }
  684. *out_offset = offset;
  685. }