Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lp_bld_swizzle.c 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /**************************************************************************
  2. *
  3. * Copyright 2009 VMware, Inc.
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /**
  28. * @file
  29. * Helper functions for swizzling/shuffling.
  30. *
  31. * @author Jose Fonseca <jfonseca@vmware.com>
  32. */
  33. #include "util/u_debug.h"
  34. #include "lp_bld_type.h"
  35. #include "lp_bld_const.h"
  36. #include "lp_bld_logic.h"
  37. #include "lp_bld_swizzle.h"
  38. LLVMValueRef
  39. lp_build_broadcast(LLVMBuilderRef builder,
  40. LLVMTypeRef vec_type,
  41. LLVMValueRef scalar)
  42. {
  43. const unsigned n = LLVMGetVectorSize(vec_type);
  44. LLVMValueRef res;
  45. unsigned i;
  46. res = LLVMGetUndef(vec_type);
  47. for(i = 0; i < n; ++i) {
  48. LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
  49. res = LLVMBuildInsertElement(builder, res, scalar, index, "");
  50. }
  51. return res;
  52. }
  53. LLVMValueRef
  54. lp_build_broadcast_scalar(struct lp_build_context *bld,
  55. LLVMValueRef scalar)
  56. {
  57. const struct lp_type type = bld->type;
  58. LLVMValueRef res;
  59. unsigned i;
  60. res = bld->undef;
  61. for(i = 0; i < type.length; ++i) {
  62. LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
  63. res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
  64. }
  65. return res;
  66. }
  67. LLVMValueRef
  68. lp_build_broadcast_aos(struct lp_build_context *bld,
  69. LLVMValueRef a,
  70. unsigned channel)
  71. {
  72. const struct lp_type type = bld->type;
  73. const unsigned n = type.length;
  74. unsigned i, j;
  75. if(a == bld->undef || a == bld->zero || a == bld->one)
  76. return a;
  77. /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
  78. * using shuffles here actually causes worst results. More investigation is
  79. * needed. */
  80. if (n <= 4) {
  81. /*
  82. * Shuffle.
  83. */
  84. LLVMTypeRef elem_type = LLVMInt32Type();
  85. LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  86. for(j = 0; j < n; j += 4)
  87. for(i = 0; i < 4; ++i)
  88. shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
  89. return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
  90. }
  91. else {
  92. /*
  93. * Bit mask and recursive shifts
  94. *
  95. * XYZW XYZW .... XYZW <= input
  96. * 0Y00 0Y00 .... 0Y00
  97. * YY00 YY00 .... YY00
  98. * YYYY YYYY .... YYYY <= output
  99. */
  100. struct lp_type type4 = type;
  101. const char shifts[4][2] = {
  102. { 1, 2},
  103. {-1, 2},
  104. { 1, -2},
  105. {-1, -2}
  106. };
  107. boolean cond[4];
  108. unsigned i;
  109. memset(cond, 0, sizeof cond);
  110. cond[channel] = 1;
  111. a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
  112. type4.width *= 4;
  113. type4.length /= 4;
  114. a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
  115. for(i = 0; i < 2; ++i) {
  116. LLVMValueRef tmp = NULL;
  117. int shift = shifts[channel][i];
  118. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  119. shift = -shift;
  120. #endif
  121. if(shift > 0)
  122. tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), "");
  123. if(shift < 0)
  124. tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), "");
  125. assert(tmp);
  126. if(tmp)
  127. a = LLVMBuildOr(bld->builder, a, tmp, "");
  128. }
  129. return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), "");
  130. }
  131. }
  132. LLVMValueRef
  133. lp_build_swizzle1_aos(struct lp_build_context *bld,
  134. LLVMValueRef a,
  135. const unsigned char swizzle[4])
  136. {
  137. const unsigned n = bld->type.length;
  138. unsigned i, j;
  139. if(a == bld->undef || a == bld->zero || a == bld->one)
  140. return a;
  141. if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
  142. return lp_build_broadcast_aos(bld, a, swizzle[0]);
  143. {
  144. /*
  145. * Shuffle.
  146. */
  147. LLVMTypeRef elem_type = LLVMInt32Type();
  148. LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  149. for(j = 0; j < n; j += 4)
  150. for(i = 0; i < 4; ++i)
  151. shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
  152. return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
  153. }
  154. }
  155. LLVMValueRef
  156. lp_build_swizzle2_aos(struct lp_build_context *bld,
  157. LLVMValueRef a,
  158. LLVMValueRef b,
  159. const unsigned char swizzle[4])
  160. {
  161. const unsigned n = bld->type.length;
  162. unsigned i, j;
  163. if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
  164. return lp_build_swizzle1_aos(bld, a, swizzle);
  165. if(a == b) {
  166. unsigned char swizzle1[4];
  167. swizzle1[0] = swizzle[0] % 4;
  168. swizzle1[1] = swizzle[1] % 4;
  169. swizzle1[2] = swizzle[2] % 4;
  170. swizzle1[3] = swizzle[3] % 4;
  171. return lp_build_swizzle1_aos(bld, a, swizzle1);
  172. }
  173. if(swizzle[0] % 4 == 0 &&
  174. swizzle[1] % 4 == 1 &&
  175. swizzle[2] % 4 == 2 &&
  176. swizzle[3] % 4 == 3) {
  177. boolean cond[4];
  178. cond[0] = swizzle[0] / 4;
  179. cond[1] = swizzle[1] / 4;
  180. cond[2] = swizzle[2] / 4;
  181. cond[3] = swizzle[3] / 4;
  182. return lp_build_select_aos(bld, a, b, cond);
  183. }
  184. {
  185. /*
  186. * Shuffle.
  187. */
  188. LLVMTypeRef elem_type = LLVMInt32Type();
  189. LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  190. for(j = 0; j < n; j += 4)
  191. for(i = 0; i < 4; ++i)
  192. shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
  193. return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
  194. }
  195. }