Besides separating out a logical step of the giant register allocator function, this now communicates a bunch of the allocator information through entries in brw_context, which will make this code partially reusable for caching the expensive allocator setup.

před 14 roky · b1f0bffd39
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -748,6 +748,29 @@ struct brw_context
       * Pre-gen6, push constants live in the CURBE.
       */
      uint32_t push_const_offset;

      /** @{ register allocator */

      struct ra_regs *regs;

      /** Array of the ra classes for the unaligned contiguous
       * register block sizes used.
       */
      int *classes;

      /**
       * Mapping for register-allocated objects in *regs to the first
       * GRF for that object.
      */
      uint8_t *ra_reg_to_grf;

      /**
       * ra class for the aligned pairs we use for PLN, which doesn't
       * appear in *classes.
       */
      int aligned_pairs_class;

      /** @} */
   } wm;


--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -87,55 +87,14 @@ fs_visitor::assign_regs_trivial()

 }

 bool
 fs_visitor::assign_regs()
 static void
 brw_alloc_reg_set_for_classes(struct brw_context *brw,
 			      int *class_sizes,
 			      int class_count,
 			      int reg_width,
 			      int base_reg_count)
 {
   /* Most of this allocation was written for a reg_width of 1
    * (dispatch_width == 8).  In extending to 16-wide, the code was
    * left in place and it was converted to have the hardware
    * registers it's allocating be contiguous physical pairs of regs
    * for reg_width == 2.
    */
   int reg_width = c->dispatch_width / 8;
   int hw_reg_mapping[this->virtual_grf_next];
   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
   int class_sizes[base_reg_count];
   int class_count = 0;
   int aligned_pairs_class = -1;

   calculate_live_intervals();

   /* Set up the register classes.
    *
    * The base registers store a scalar value.  For texture samples,
    * we get virtual GRFs composed of 4 contiguous hw register.  For
    * structures and arrays, we store them as contiguous larger things
    * than that, though we should be able to do better most of the
    * time.
    */
   class_sizes[class_count++] = 1;
   if (brw->has_pln && intel->gen < 6) {
      /* Always set up the (unaligned) pairs for gen5, so we can find
       * them for making the aligned pair class.
       */
      class_sizes[class_count++] = 2;
   }
   for (int r = 0; r < this->virtual_grf_next; r++) {
      int i;

      for (i = 0; i < class_count; i++) {
 	 if (class_sizes[i] == this->virtual_grf_sizes[r])
 	    break;
      }
      if (i == class_count) {
 	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
 	    fail("Object too large to register allocate.\n");
 	 }

 	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
      }
   }
   struct intel_context *intel = &brw->intel;

   /* Compute the total number of registers across all classes. */
   int ra_reg_count = 0;
@@ -143,9 +102,14 @@ fs_visitor::assign_regs()
      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
   }

   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
   uint8_t ra_reg_to_grf[ra_reg_count];
   int classes[class_count + 1];
   ralloc_free(brw->wm.ra_reg_to_grf);
   brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
   ralloc_free(brw->wm.regs);
   brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
   ralloc_free(brw->wm.classes);
   brw->wm.classes = ralloc_array(brw, int, class_count + 1);

   brw->wm.aligned_pairs_class = -1;

   /* Now, add the registers to their classes, and add the conflicts
    * between them and the base GRF registers (and also each other).
@@ -155,7 +119,7 @@ fs_visitor::assign_regs()
   int pairs_reg_count = 0;
   for (int i = 0; i < class_count; i++) {
      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
      classes[i] = ra_alloc_reg_class(regs);
      brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);

      /* Save this off for the aligned pair class at the end. */
      if (class_sizes[i] == 2) {
@@ -164,14 +128,14 @@ fs_visitor::assign_regs()
      }

      for (int j = 0; j < class_reg_count; j++) {
 	 ra_class_add_reg(regs, classes[i], reg);
 	 ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);

 	 ra_reg_to_grf[reg] = j;
 	 brw->wm.ra_reg_to_grf[reg] = j;

 	 for (int base_reg = j;
 	      base_reg < j + class_sizes[i];
 	      base_reg++) {
 	    ra_add_transitive_reg_conflict(regs, base_reg, reg);
 	    ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
 	 }

 	 reg++;
@@ -183,30 +147,83 @@ fs_visitor::assign_regs()
    * in on gen5 so that we can do PLN.
    */
   if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
      aligned_pairs_class = ra_alloc_reg_class(regs);
      brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);

      for (int i = 0; i < pairs_reg_count; i++) {
 	 if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
 	    ra_class_add_reg(regs, aligned_pairs_class,
 	 if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
 	    ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
 			     pairs_base_reg + i);
 	 }
      }
      class_count++;
   }

   ra_set_finalize(regs);
   ra_set_finalize(brw->wm.regs);
 }

 bool
 fs_visitor::assign_regs()
 {
   /* Most of this allocation was written for a reg_width of 1
    * (dispatch_width == 8).  In extending to 16-wide, the code was
    * left in place and it was converted to have the hardware
    * registers it's allocating be contiguous physical pairs of regs
    * for reg_width == 2.
    */
   int reg_width = c->dispatch_width / 8;
   int hw_reg_mapping[this->virtual_grf_next];
   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
   int class_sizes[base_reg_count];
   int class_count = 0;

   calculate_live_intervals();

   /* Set up the register classes.
    *
    * The base registers store a scalar value.  For texture samples,
    * we get virtual GRFs composed of 4 contiguous hw register.  For
    * structures and arrays, we store them as contiguous larger things
    * than that, though we should be able to do better most of the
    * time.
    */
   class_sizes[class_count++] = 1;
   if (brw->has_pln && intel->gen < 6) {
      /* Always set up the (unaligned) pairs for gen5, so we can find
       * them for making the aligned pair class.
       */
      class_sizes[class_count++] = 2;
   }
   for (int r = 0; r < this->virtual_grf_next; r++) {
      int i;

      for (i = 0; i < class_count; i++) {
 	 if (class_sizes[i] == this->virtual_grf_sizes[r])
 	    break;
      }
      if (i == class_count) {
 	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
 	    fail("Object too large to register allocate.\n");
 	 }

 	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
      }
   }

   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
 				 reg_width, base_reg_count);

   struct ra_graph *g = ra_alloc_interference_graph(regs,
   struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
 						    this->virtual_grf_next);

   for (int i = 0; i < this->virtual_grf_next; i++) {
      for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
 	    if (aligned_pairs_class >= 0 &&
 	    if (brw->wm.aligned_pairs_class >= 0 &&
 		this->delta_x.reg == i) {
 	       ra_set_node_class(g, i, aligned_pairs_class);
 	       ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
 	    } else {
 	       ra_set_node_class(g, i, classes[c]);
 	       ra_set_node_class(g, i, brw->wm.classes[c]);
 	    }
 	    break;
 	 }
@@ -237,7 +254,6 @@ fs_visitor::assign_regs()


      ralloc_free(g);
      ralloc_free(regs);

      return false;
   }
@@ -250,7 +266,8 @@ fs_visitor::assign_regs()
   for (int i = 0; i < this->virtual_grf_next; i++) {
      int reg = ra_get_node_reg(g, i);

      hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
      hw_reg_mapping[i] = (first_assigned_grf +
 			   brw->wm.ra_reg_to_grf[reg] * reg_width);
      this->grf_used = MAX2(this->grf_used,
 			    hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
 			    reg_width);
@@ -265,7 +282,6 @@ fs_visitor::assign_regs()
   }

   ralloc_free(g);
   ralloc_free(regs);

   return true;
 }