Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ir3_legalize.c 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
  2. /*
  3. * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. *
  24. * Authors:
  25. * Rob Clark <robclark@freedesktop.org>
  26. */
  27. #include "pipe/p_shader_tokens.h"
  28. #include "util/u_math.h"
  29. #include "freedreno_util.h"
  30. #include "ir3.h"
  31. /*
  32. * Legalize:
  33. *
  34. * We currently require that scheduling ensures that we have enough nop's
  35. * in all the right places. The legalize step mostly handles fixing up
  36. * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's
  37. * into fewer nop's w/ rpt flag.
  38. */
  39. struct ir3_legalize_ctx {
  40. struct ir3_block *block;
  41. bool has_samp;
  42. int max_bary;
  43. };
  44. static void legalize(struct ir3_legalize_ctx *ctx)
  45. {
  46. struct ir3_block *block = ctx->block;
  47. struct ir3_instruction *n;
  48. struct ir3 *shader = block->shader;
  49. struct ir3_instruction *end =
  50. ir3_instr_create(block, 0, OPC_END);
  51. struct ir3_instruction *last_input = NULL;
  52. struct ir3_instruction *last_rel = NULL;
  53. regmask_t needs_ss_war; /* write after read */
  54. regmask_t needs_ss;
  55. regmask_t needs_sy;
  56. regmask_init(&needs_ss_war);
  57. regmask_init(&needs_ss);
  58. regmask_init(&needs_sy);
  59. shader->instrs_count = 0;
  60. for (n = block->head; n; n = n->next) {
  61. struct ir3_register *reg;
  62. unsigned i;
  63. if (is_meta(n))
  64. continue;
  65. if (is_input(n)) {
  66. struct ir3_register *inloc = n->regs[1];
  67. assert(inloc->flags & IR3_REG_IMMED);
  68. ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
  69. }
  70. for (i = 1; i < n->regs_count; i++) {
  71. reg = n->regs[i];
  72. if (reg_gpr(reg)) {
  73. /* TODO: we probably only need (ss) for alu
  74. * instr consuming sfu result.. need to make
  75. * some tests for both this and (sy)..
  76. */
  77. if (regmask_get(&needs_ss, reg)) {
  78. n->flags |= IR3_INSTR_SS;
  79. regmask_init(&needs_ss);
  80. }
  81. if (regmask_get(&needs_sy, reg)) {
  82. n->flags |= IR3_INSTR_SY;
  83. regmask_init(&needs_sy);
  84. }
  85. }
  86. /* TODO: is it valid to have address reg loaded from a
  87. * relative src (ie. mova a0, c<a0.x+4>)? If so, the
  88. * last_rel check below should be moved ahead of this:
  89. */
  90. if (reg->flags & IR3_REG_RELATIV)
  91. last_rel = n;
  92. }
  93. if (n->regs_count > 0) {
  94. reg = n->regs[0];
  95. if (regmask_get(&needs_ss_war, reg)) {
  96. n->flags |= IR3_INSTR_SS;
  97. regmask_init(&needs_ss_war); // ??? I assume?
  98. }
  99. if (last_rel && (reg->num == regid(REG_A0, 0))) {
  100. last_rel->flags |= IR3_INSTR_UL;
  101. last_rel = NULL;
  102. }
  103. }
  104. /* cat5+ does not have an (ss) bit, if needed we need to
  105. * insert a nop to carry the sync flag. Would be kinda
  106. * clever if we were aware of this during scheduling, but
  107. * this should be a pretty rare case:
  108. */
  109. if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
  110. struct ir3_instruction *nop;
  111. nop = ir3_instr_create(block, 0, OPC_NOP);
  112. nop->flags |= IR3_INSTR_SS;
  113. n->flags &= ~IR3_INSTR_SS;
  114. }
  115. /* need to be able to set (ss) on first instruction: */
  116. if ((shader->instrs_count == 0) && (n->category >= 5))
  117. ir3_instr_create(block, 0, OPC_NOP);
  118. if (is_nop(n) && shader->instrs_count) {
  119. struct ir3_instruction *last =
  120. shader->instrs[shader->instrs_count-1];
  121. if (is_nop(last) && (last->repeat < 5)) {
  122. last->repeat++;
  123. last->flags |= n->flags;
  124. continue;
  125. }
  126. }
  127. shader->instrs[shader->instrs_count++] = n;
  128. if (is_sfu(n))
  129. regmask_set(&needs_ss, n->regs[0]);
  130. if (is_tex(n)) {
  131. /* this ends up being the # of samp instructions.. but that
  132. * is ok, everything else only cares whether it is zero or
  133. * not. We do this here, rather than when we encounter a
  134. * SAMP decl, because (especially in binning pass shader)
  135. * the samp instruction(s) could get eliminated if the
  136. * result is not used.
  137. */
  138. ctx->has_samp = true;
  139. regmask_set(&needs_sy, n->regs[0]);
  140. }
  141. /* both tex/sfu appear to not always immediately consume
  142. * their src register(s):
  143. */
  144. if (is_tex(n) || is_sfu(n)) {
  145. for (i = 1; i < n->regs_count; i++) {
  146. reg = n->regs[i];
  147. if (reg_gpr(reg))
  148. regmask_set(&needs_ss_war, reg);
  149. }
  150. }
  151. if (is_input(n))
  152. last_input = n;
  153. }
  154. if (last_input)
  155. last_input->regs[0]->flags |= IR3_REG_EI;
  156. if (last_rel)
  157. last_rel->flags |= IR3_INSTR_UL;
  158. shader->instrs[shader->instrs_count++] = end;
  159. shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
  160. }
  161. void ir3_block_legalize(struct ir3_block *block,
  162. bool *has_samp, int *max_bary)
  163. {
  164. struct ir3_legalize_ctx ctx = {
  165. .block = block,
  166. .max_bary = -1,
  167. };
  168. legalize(&ctx);
  169. *has_samp = ctx.has_samp;
  170. *max_bary = ctx.max_bary;
  171. }