Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

brw_fs_channel_expressions.cpp 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * Copyright © 2010 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21. * DEALINGS IN THE SOFTWARE.
  22. */
  23. /**
  24. * \file brw_wm_channel_expressions.cpp
  25. *
  26. * Breaks vector operations down into operations on each component.
  27. *
  28. * The 965 fragment shader receives 8 or 16 pixels at a time, so each
  29. * channel of a vector is laid out as 1 or 2 8-float registers. Each
  30. * ALU operation operates on one of those channel registers. As a
  31. * result, there is no value to the 965 fragment shader in tracking
  32. * "vector" expressions in the sense of GLSL fragment shaders, when
  33. * doing a channel at a time may help in constant folding, algebraic
  34. * simplification, and reducing the liveness of channel registers.
  35. *
  36. * The exception to the desire to break everything down to floats is
  37. * texturing. The texture sampler returns a writemasked masked
  38. * 4/8-register sequence containing the texture values. We don't want
  39. * to dispatch to the sampler separately for each channel we need, so
  40. * we do retain the vector types in that case.
  41. */
  42. extern "C" {
  43. #include "main/core.h"
  44. #include "brw_wm.h"
  45. }
  46. #include "glsl/ir.h"
  47. #include "glsl/ir_expression_flattening.h"
  48. #include "glsl/glsl_types.h"
  49. class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
  50. public:
  51. ir_channel_expressions_visitor()
  52. {
  53. this->progress = false;
  54. this->mem_ctx = NULL;
  55. }
  56. ir_visitor_status visit_leave(ir_assignment *);
  57. ir_rvalue *get_element(ir_variable *var, unsigned int element);
  58. void assign(ir_assignment *ir, int elem, ir_rvalue *val);
  59. bool progress;
  60. void *mem_ctx;
  61. };
  62. static bool
  63. channel_expressions_predicate(ir_instruction *ir)
  64. {
  65. ir_expression *expr = ir->as_expression();
  66. unsigned int i;
  67. if (!expr)
  68. return false;
  69. for (i = 0; i < expr->get_num_operands(); i++) {
  70. if (expr->operands[i]->type->is_vector())
  71. return true;
  72. }
  73. return false;
  74. }
  75. bool
  76. brw_do_channel_expressions(exec_list *instructions)
  77. {
  78. ir_channel_expressions_visitor v;
  79. /* Pull out any matrix expression to a separate assignment to a
  80. * temp. This will make our handling of the breakdown to
  81. * operations on the matrix's vector components much easier.
  82. */
  83. do_expression_flattening(instructions, channel_expressions_predicate);
  84. visit_list_elements(&v, instructions);
  85. return v.progress;
  86. }
  87. ir_rvalue *
  88. ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
  89. {
  90. ir_dereference *deref;
  91. if (var->type->is_scalar())
  92. return new(mem_ctx) ir_dereference_variable(var);
  93. assert(elem < var->type->components());
  94. deref = new(mem_ctx) ir_dereference_variable(var);
  95. return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
  96. }
  97. void
  98. ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
  99. {
  100. ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
  101. ir_assignment *assign;
  102. /* This assign-of-expression should have been generated by the
  103. * expression flattening visitor (since we never short circit to
  104. * not flatten, even for plain assignments of variables), so the
  105. * writemask is always full.
  106. */
  107. assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
  108. assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
  109. ir->insert_before(assign);
  110. }
  111. ir_visitor_status
  112. ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
  113. {
  114. ir_expression *expr = ir->rhs->as_expression();
  115. bool found_vector = false;
  116. unsigned int i, vector_elements = 1;
  117. ir_variable *op_var[3];
  118. if (!expr)
  119. return visit_continue;
  120. if (!this->mem_ctx)
  121. this->mem_ctx = ralloc_parent(ir);
  122. for (i = 0; i < expr->get_num_operands(); i++) {
  123. if (expr->operands[i]->type->is_vector()) {
  124. found_vector = true;
  125. vector_elements = expr->operands[i]->type->vector_elements;
  126. break;
  127. }
  128. }
  129. if (!found_vector)
  130. return visit_continue;
  131. /* Store the expression operands in temps so we can use them
  132. * multiple times.
  133. */
  134. for (i = 0; i < expr->get_num_operands(); i++) {
  135. ir_assignment *assign;
  136. ir_dereference *deref;
  137. assert(!expr->operands[i]->type->is_matrix());
  138. op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
  139. "channel_expressions",
  140. ir_var_temporary);
  141. ir->insert_before(op_var[i]);
  142. deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
  143. assign = new(mem_ctx) ir_assignment(deref,
  144. expr->operands[i],
  145. NULL);
  146. ir->insert_before(assign);
  147. }
  148. const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
  149. 1, 1);
  150. /* OK, time to break down this vector operation. */
  151. switch (expr->operation) {
  152. case ir_unop_bit_not:
  153. case ir_unop_logic_not:
  154. case ir_unop_neg:
  155. case ir_unop_abs:
  156. case ir_unop_sign:
  157. case ir_unop_rcp:
  158. case ir_unop_rsq:
  159. case ir_unop_sqrt:
  160. case ir_unop_exp:
  161. case ir_unop_log:
  162. case ir_unop_exp2:
  163. case ir_unop_log2:
  164. case ir_unop_bitcast_i2f:
  165. case ir_unop_bitcast_f2i:
  166. case ir_unop_bitcast_f2u:
  167. case ir_unop_bitcast_u2f:
  168. case ir_unop_i2u:
  169. case ir_unop_u2i:
  170. case ir_unop_f2i:
  171. case ir_unop_f2u:
  172. case ir_unop_i2f:
  173. case ir_unop_f2b:
  174. case ir_unop_b2f:
  175. case ir_unop_i2b:
  176. case ir_unop_b2i:
  177. case ir_unop_u2f:
  178. case ir_unop_trunc:
  179. case ir_unop_ceil:
  180. case ir_unop_floor:
  181. case ir_unop_fract:
  182. case ir_unop_round_even:
  183. case ir_unop_sin:
  184. case ir_unop_cos:
  185. case ir_unop_sin_reduced:
  186. case ir_unop_cos_reduced:
  187. case ir_unop_dFdx:
  188. case ir_unop_dFdy:
  189. case ir_unop_bitfield_reverse:
  190. case ir_unop_bit_count:
  191. case ir_unop_find_msb:
  192. case ir_unop_find_lsb:
  193. for (i = 0; i < vector_elements; i++) {
  194. ir_rvalue *op0 = get_element(op_var[0], i);
  195. assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  196. element_type,
  197. op0,
  198. NULL));
  199. }
  200. break;
  201. case ir_binop_add:
  202. case ir_binop_sub:
  203. case ir_binop_mul:
  204. case ir_binop_imul_high:
  205. case ir_binop_div:
  206. case ir_binop_carry:
  207. case ir_binop_borrow:
  208. case ir_binop_mod:
  209. case ir_binop_min:
  210. case ir_binop_max:
  211. case ir_binop_pow:
  212. case ir_binop_lshift:
  213. case ir_binop_rshift:
  214. case ir_binop_bit_and:
  215. case ir_binop_bit_xor:
  216. case ir_binop_bit_or:
  217. case ir_binop_less:
  218. case ir_binop_greater:
  219. case ir_binop_lequal:
  220. case ir_binop_gequal:
  221. case ir_binop_equal:
  222. case ir_binop_nequal:
  223. for (i = 0; i < vector_elements; i++) {
  224. ir_rvalue *op0 = get_element(op_var[0], i);
  225. ir_rvalue *op1 = get_element(op_var[1], i);
  226. assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  227. element_type,
  228. op0,
  229. op1));
  230. }
  231. break;
  232. case ir_unop_any: {
  233. ir_expression *temp;
  234. temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
  235. element_type,
  236. get_element(op_var[0], 0),
  237. get_element(op_var[0], 1));
  238. for (i = 2; i < vector_elements; i++) {
  239. temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
  240. element_type,
  241. get_element(op_var[0], i),
  242. temp);
  243. }
  244. assign(ir, 0, temp);
  245. break;
  246. }
  247. case ir_binop_dot: {
  248. ir_expression *last = NULL;
  249. for (i = 0; i < vector_elements; i++) {
  250. ir_rvalue *op0 = get_element(op_var[0], i);
  251. ir_rvalue *op1 = get_element(op_var[1], i);
  252. ir_expression *temp;
  253. temp = new(mem_ctx) ir_expression(ir_binop_mul,
  254. element_type,
  255. op0,
  256. op1);
  257. if (last) {
  258. last = new(mem_ctx) ir_expression(ir_binop_add,
  259. element_type,
  260. temp,
  261. last);
  262. } else {
  263. last = temp;
  264. }
  265. }
  266. assign(ir, 0, last);
  267. break;
  268. }
  269. case ir_binop_logic_and:
  270. case ir_binop_logic_xor:
  271. case ir_binop_logic_or:
  272. ir->print();
  273. printf("\n");
  274. assert(!"not reached: expression operates on scalars only");
  275. break;
  276. case ir_binop_all_equal:
  277. case ir_binop_any_nequal: {
  278. ir_expression *last = NULL;
  279. for (i = 0; i < vector_elements; i++) {
  280. ir_rvalue *op0 = get_element(op_var[0], i);
  281. ir_rvalue *op1 = get_element(op_var[1], i);
  282. ir_expression *temp;
  283. ir_expression_operation join;
  284. if (expr->operation == ir_binop_all_equal)
  285. join = ir_binop_logic_and;
  286. else
  287. join = ir_binop_logic_or;
  288. temp = new(mem_ctx) ir_expression(expr->operation,
  289. element_type,
  290. op0,
  291. op1);
  292. if (last) {
  293. last = new(mem_ctx) ir_expression(join,
  294. element_type,
  295. temp,
  296. last);
  297. } else {
  298. last = temp;
  299. }
  300. }
  301. assign(ir, 0, last);
  302. break;
  303. }
  304. case ir_unop_noise:
  305. assert(!"noise should have been broken down to function call");
  306. break;
  307. case ir_binop_bfm: {
  308. /* Does not need to be scalarized, since its result will be identical
  309. * for all channels.
  310. */
  311. ir_rvalue *op0 = get_element(op_var[0], 0);
  312. ir_rvalue *op1 = get_element(op_var[1], 0);
  313. assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
  314. element_type,
  315. op0,
  316. op1));
  317. break;
  318. }
  319. case ir_binop_ubo_load:
  320. assert(!"not yet supported");
  321. break;
  322. case ir_triop_fma:
  323. case ir_triop_lrp:
  324. case ir_triop_csel:
  325. case ir_triop_bitfield_extract:
  326. for (i = 0; i < vector_elements; i++) {
  327. ir_rvalue *op0 = get_element(op_var[0], i);
  328. ir_rvalue *op1 = get_element(op_var[1], i);
  329. ir_rvalue *op2 = get_element(op_var[2], i);
  330. assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  331. element_type,
  332. op0,
  333. op1,
  334. op2));
  335. }
  336. break;
  337. case ir_triop_bfi: {
  338. /* Only a single BFM is needed for multiple BFIs. */
  339. ir_rvalue *op0 = get_element(op_var[0], 0);
  340. for (i = 0; i < vector_elements; i++) {
  341. ir_rvalue *op1 = get_element(op_var[1], i);
  342. ir_rvalue *op2 = get_element(op_var[2], i);
  343. assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  344. element_type,
  345. op0->clone(mem_ctx, NULL),
  346. op1,
  347. op2));
  348. }
  349. break;
  350. }
  351. case ir_unop_pack_snorm_2x16:
  352. case ir_unop_pack_snorm_4x8:
  353. case ir_unop_pack_unorm_2x16:
  354. case ir_unop_pack_unorm_4x8:
  355. case ir_unop_pack_half_2x16:
  356. case ir_unop_unpack_snorm_2x16:
  357. case ir_unop_unpack_snorm_4x8:
  358. case ir_unop_unpack_unorm_2x16:
  359. case ir_unop_unpack_unorm_4x8:
  360. case ir_unop_unpack_half_2x16:
  361. case ir_binop_ldexp:
  362. case ir_binop_vector_extract:
  363. case ir_triop_vector_insert:
  364. case ir_quadop_bitfield_insert:
  365. case ir_quadop_vector:
  366. assert(!"should have been lowered");
  367. break;
  368. case ir_unop_unpack_half_2x16_split_x:
  369. case ir_unop_unpack_half_2x16_split_y:
  370. case ir_binop_pack_half_2x16_split:
  371. assert(!"not reached: expression operates on scalars only");
  372. break;
  373. }
  374. ir->remove();
  375. this->progress = true;
  376. return visit_continue;
  377. }