Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lp_bld_format_aos.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /**************************************************************************
  2. *
  3. * Copyright 2009 VMware, Inc.
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /**
  28. * @file
  29. * AoS pixel format manipulation.
  30. *
  31. * @author Jose Fonseca <jfonseca@vmware.com>
  32. */
  33. #include "util/u_cpu_detect.h"
  34. #include "util/u_format.h"
  35. #include "lp_bld_type.h"
  36. #include "lp_bld_const.h"
  37. #include "lp_bld_logic.h"
  38. #include "lp_bld_swizzle.h"
  39. #include "lp_bld_format.h"
  40. /**
  41. * Unpack a single pixel into its RGBA components.
  42. *
  43. * @param packed integer.
  44. *
  45. * @return RGBA in a 4 floats vector.
  46. *
  47. * XXX: This is mostly for reference and testing -- operating a single pixel at
  48. * a time is rarely if ever needed.
  49. */
  50. LLVMValueRef
  51. lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
  52. const struct util_format_description *desc,
  53. LLVMValueRef packed)
  54. {
  55. LLVMTypeRef type;
  56. LLVMValueRef shifted, casted, scaled, masked;
  57. LLVMValueRef shifts[4];
  58. LLVMValueRef masks[4];
  59. LLVMValueRef scales[4];
  60. LLVMValueRef swizzles[4];
  61. LLVMValueRef aux[4];
  62. bool normalized;
  63. int empty_channel;
  64. unsigned shift;
  65. unsigned i;
  66. /* FIXME: Support more formats */
  67. assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
  68. assert(desc->block.width == 1);
  69. assert(desc->block.height == 1);
  70. assert(desc->block.bits <= 32);
  71. type = LLVMIntType(desc->block.bits);
  72. /* Do the intermediate integer computations with 32bit integers since it
  73. * matches floating point size */
  74. if (desc->block.bits < 32)
  75. packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
  76. /* Broadcast the packed value to all four channels */
  77. packed = LLVMBuildInsertElement(builder,
  78. LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
  79. packed,
  80. LLVMConstNull(LLVMInt32Type()),
  81. "");
  82. packed = LLVMBuildShuffleVector(builder,
  83. packed,
  84. LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
  85. LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
  86. "");
  87. /* Initialize vector constants */
  88. normalized = FALSE;
  89. empty_channel = -1;
  90. shift = 0;
  91. for (i = 0; i < 4; ++i) {
  92. unsigned bits = desc->channel[i].size;
  93. if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  94. shifts[i] = LLVMGetUndef(LLVMInt32Type());
  95. masks[i] = LLVMConstNull(LLVMInt32Type());
  96. scales[i] = LLVMConstNull(LLVMFloatType());
  97. empty_channel = i;
  98. }
  99. else {
  100. unsigned mask = (1 << bits) - 1;
  101. assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  102. assert(bits < 32);
  103. shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
  104. masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
  105. if (desc->channel[i].normalized) {
  106. scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
  107. normalized = TRUE;
  108. }
  109. else
  110. scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
  111. }
  112. shift += bits;
  113. }
  114. shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
  115. masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
  116. /* UIToFP can't be expressed in SSE2 */
  117. casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
  118. if (normalized)
  119. scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
  120. else
  121. scaled = casted;
  122. for (i = 0; i < 4; ++i)
  123. aux[i] = LLVMGetUndef(LLVMFloatType());
  124. for (i = 0; i < 4; ++i) {
  125. enum util_format_swizzle swizzle = desc->swizzle[i];
  126. switch (swizzle) {
  127. case UTIL_FORMAT_SWIZZLE_X:
  128. case UTIL_FORMAT_SWIZZLE_Y:
  129. case UTIL_FORMAT_SWIZZLE_Z:
  130. case UTIL_FORMAT_SWIZZLE_W:
  131. swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
  132. break;
  133. case UTIL_FORMAT_SWIZZLE_0:
  134. assert(empty_channel >= 0);
  135. swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
  136. break;
  137. case UTIL_FORMAT_SWIZZLE_1:
  138. swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
  139. aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
  140. break;
  141. case UTIL_FORMAT_SWIZZLE_NONE:
  142. swizzles[i] = LLVMGetUndef(LLVMFloatType());
  143. assert(0);
  144. break;
  145. }
  146. }
  147. return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
  148. }
  149. /**
  150. * Take a vector with packed pixels and unpack into a rgba8 vector.
  151. *
  152. * Formats with bit depth smaller than 32bits are accepted, but they must be
  153. * padded to 32bits.
  154. */
  155. LLVMValueRef
  156. lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
  157. const struct util_format_description *desc,
  158. struct lp_type type,
  159. LLVMValueRef packed)
  160. {
  161. struct lp_build_context bld;
  162. bool rgba8;
  163. LLVMValueRef res;
  164. unsigned i;
  165. lp_build_context_init(&bld, builder, type);
  166. /* FIXME: Support more formats */
  167. assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
  168. assert(desc->block.width == 1);
  169. assert(desc->block.height == 1);
  170. assert(desc->block.bits <= 32);
  171. assert(!type.floating);
  172. assert(!type.fixed);
  173. assert(type.norm);
  174. assert(type.width == 8);
  175. assert(type.length % 4 == 0);
  176. rgba8 = TRUE;
  177. for(i = 0; i < 4; ++i) {
  178. assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
  179. desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
  180. if(desc->channel[0].size != 8)
  181. rgba8 = FALSE;
  182. }
  183. if(rgba8) {
  184. /*
  185. * The pixel is already in a rgba8 format variant. All it is necessary
  186. * is to swizzle the channels.
  187. */
  188. unsigned char swizzles[4];
  189. boolean zeros[4]; /* bitwise AND mask */
  190. boolean ones[4]; /* bitwise OR mask */
  191. boolean swizzles_needed = FALSE;
  192. boolean zeros_needed = FALSE;
  193. boolean ones_needed = FALSE;
  194. for(i = 0; i < 4; ++i) {
  195. enum util_format_swizzle swizzle = desc->swizzle[i];
  196. /* Initialize with the no-op case */
  197. swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
  198. zeros[i] = TRUE;
  199. ones[i] = FALSE;
  200. switch (swizzle) {
  201. case UTIL_FORMAT_SWIZZLE_X:
  202. case UTIL_FORMAT_SWIZZLE_Y:
  203. case UTIL_FORMAT_SWIZZLE_Z:
  204. case UTIL_FORMAT_SWIZZLE_W:
  205. if(swizzle != swizzles[i]) {
  206. swizzles[i] = swizzle;
  207. swizzles_needed = TRUE;
  208. }
  209. break;
  210. case UTIL_FORMAT_SWIZZLE_0:
  211. zeros[i] = FALSE;
  212. zeros_needed = TRUE;
  213. break;
  214. case UTIL_FORMAT_SWIZZLE_1:
  215. ones[i] = TRUE;
  216. ones_needed = TRUE;
  217. break;
  218. case UTIL_FORMAT_SWIZZLE_NONE:
  219. assert(0);
  220. break;
  221. }
  222. }
  223. res = packed;
  224. if(swizzles_needed)
  225. res = lp_build_swizzle1_aos(&bld, res, swizzles);
  226. if(zeros_needed) {
  227. /* Mask out zero channels */
  228. LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
  229. res = LLVMBuildAnd(builder, res, mask, "");
  230. }
  231. if(ones_needed) {
  232. /* Or one channels */
  233. LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
  234. res = LLVMBuildOr(builder, res, mask, "");
  235. }
  236. }
  237. else {
  238. /* FIXME */
  239. assert(0);
  240. res = lp_build_undef(type);
  241. }
  242. return res;
  243. }
  244. /**
  245. * Pack a single pixel.
  246. *
  247. * @param rgba 4 float vector with the unpacked components.
  248. *
  249. * XXX: This is mostly for reference and testing -- operating a single pixel at
  250. * a time is rarely if ever needed.
  251. */
  252. LLVMValueRef
  253. lp_build_pack_rgba_aos(LLVMBuilderRef builder,
  254. const struct util_format_description *desc,
  255. LLVMValueRef rgba)
  256. {
  257. LLVMTypeRef type;
  258. LLVMValueRef packed = NULL;
  259. LLVMValueRef swizzles[4];
  260. LLVMValueRef shifted, casted, scaled, unswizzled;
  261. LLVMValueRef shifts[4];
  262. LLVMValueRef scales[4];
  263. bool normalized;
  264. unsigned shift;
  265. unsigned i, j;
  266. assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
  267. assert(desc->block.width == 1);
  268. assert(desc->block.height == 1);
  269. type = LLVMIntType(desc->block.bits);
  270. /* Unswizzle the color components into the source vector. */
  271. for (i = 0; i < 4; ++i) {
  272. for (j = 0; j < 4; ++j) {
  273. if (desc->swizzle[j] == i)
  274. break;
  275. }
  276. if (j < 4)
  277. swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
  278. else
  279. swizzles[i] = LLVMGetUndef(LLVMInt32Type());
  280. }
  281. unswizzled = LLVMBuildShuffleVector(builder, rgba,
  282. LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
  283. LLVMConstVector(swizzles, 4), "");
  284. normalized = FALSE;
  285. shift = 0;
  286. for (i = 0; i < 4; ++i) {
  287. unsigned bits = desc->channel[i].size;
  288. if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  289. shifts[i] = LLVMGetUndef(LLVMInt32Type());
  290. scales[i] = LLVMGetUndef(LLVMFloatType());
  291. }
  292. else {
  293. unsigned mask = (1 << bits) - 1;
  294. assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  295. assert(bits < 32);
  296. shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
  297. if (desc->channel[i].normalized) {
  298. scales[i] = LLVMConstReal(LLVMFloatType(), mask);
  299. normalized = TRUE;
  300. }
  301. else
  302. scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
  303. }
  304. shift += bits;
  305. }
  306. if (normalized)
  307. scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
  308. else
  309. scaled = unswizzled;
  310. casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
  311. shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
  312. /* Bitwise or all components */
  313. for (i = 0; i < 4; ++i) {
  314. if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
  315. LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
  316. if (packed)
  317. packed = LLVMBuildOr(builder, packed, component, "");
  318. else
  319. packed = component;
  320. }
  321. }
  322. if (!packed)
  323. packed = LLVMGetUndef(LLVMInt32Type());
  324. if (desc->block.bits < 32)
  325. packed = LLVMBuildTrunc(builder, packed, type, "");
  326. return packed;
  327. }