|
|
@@ -87,55 +87,14 @@ fs_visitor::assign_regs_trivial() |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
bool |
|
|
|
fs_visitor::assign_regs() |
|
|
|
static void |
|
|
|
brw_alloc_reg_set_for_classes(struct brw_context *brw, |
|
|
|
int *class_sizes, |
|
|
|
int class_count, |
|
|
|
int reg_width, |
|
|
|
int base_reg_count) |
|
|
|
{ |
|
|
|
/* Most of this allocation was written for a reg_width of 1 |
|
|
|
* (dispatch_width == 8). In extending to 16-wide, the code was |
|
|
|
* left in place and it was converted to have the hardware |
|
|
|
* registers it's allocating be contiguous physical pairs of regs |
|
|
|
* for reg_width == 2. |
|
|
|
*/ |
|
|
|
int reg_width = c->dispatch_width / 8; |
|
|
|
int hw_reg_mapping[this->virtual_grf_next]; |
|
|
|
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); |
|
|
|
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; |
|
|
|
int class_sizes[base_reg_count]; |
|
|
|
int class_count = 0; |
|
|
|
int aligned_pairs_class = -1; |
|
|
|
|
|
|
|
calculate_live_intervals(); |
|
|
|
|
|
|
|
/* Set up the register classes. |
|
|
|
* |
|
|
|
* The base registers store a scalar value. For texture samples, |
|
|
|
* we get virtual GRFs composed of 4 contiguous hw register. For |
|
|
|
* structures and arrays, we store them as contiguous larger things |
|
|
|
* than that, though we should be able to do better most of the |
|
|
|
* time. |
|
|
|
*/ |
|
|
|
class_sizes[class_count++] = 1; |
|
|
|
if (brw->has_pln && intel->gen < 6) { |
|
|
|
/* Always set up the (unaligned) pairs for gen5, so we can find |
|
|
|
* them for making the aligned pair class. |
|
|
|
*/ |
|
|
|
class_sizes[class_count++] = 2; |
|
|
|
} |
|
|
|
for (int r = 0; r < this->virtual_grf_next; r++) { |
|
|
|
int i; |
|
|
|
|
|
|
|
for (i = 0; i < class_count; i++) { |
|
|
|
if (class_sizes[i] == this->virtual_grf_sizes[r]) |
|
|
|
break; |
|
|
|
} |
|
|
|
if (i == class_count) { |
|
|
|
if (this->virtual_grf_sizes[r] >= base_reg_count) { |
|
|
|
fail("Object too large to register allocate.\n"); |
|
|
|
} |
|
|
|
|
|
|
|
class_sizes[class_count++] = this->virtual_grf_sizes[r]; |
|
|
|
} |
|
|
|
} |
|
|
|
struct intel_context *intel = &brw->intel; |
|
|
|
|
|
|
|
/* Compute the total number of registers across all classes. */ |
|
|
|
int ra_reg_count = 0; |
|
|
@@ -143,9 +102,14 @@ fs_visitor::assign_regs() |
|
|
|
ra_reg_count += base_reg_count - (class_sizes[i] - 1); |
|
|
|
} |
|
|
|
|
|
|
|
struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); |
|
|
|
uint8_t ra_reg_to_grf[ra_reg_count]; |
|
|
|
int classes[class_count + 1]; |
|
|
|
ralloc_free(brw->wm.ra_reg_to_grf); |
|
|
|
brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); |
|
|
|
ralloc_free(brw->wm.regs); |
|
|
|
brw->wm.regs = ra_alloc_reg_set(ra_reg_count); |
|
|
|
ralloc_free(brw->wm.classes); |
|
|
|
brw->wm.classes = ralloc_array(brw, int, class_count + 1); |
|
|
|
|
|
|
|
brw->wm.aligned_pairs_class = -1; |
|
|
|
|
|
|
|
/* Now, add the registers to their classes, and add the conflicts |
|
|
|
* between them and the base GRF registers (and also each other). |
|
|
@@ -155,7 +119,7 @@ fs_visitor::assign_regs() |
|
|
|
int pairs_reg_count = 0; |
|
|
|
for (int i = 0; i < class_count; i++) { |
|
|
|
int class_reg_count = base_reg_count - (class_sizes[i] - 1); |
|
|
|
classes[i] = ra_alloc_reg_class(regs); |
|
|
|
brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); |
|
|
|
|
|
|
|
/* Save this off for the aligned pair class at the end. */ |
|
|
|
if (class_sizes[i] == 2) { |
|
|
@@ -164,14 +128,14 @@ fs_visitor::assign_regs() |
|
|
|
} |
|
|
|
|
|
|
|
for (int j = 0; j < class_reg_count; j++) { |
|
|
|
ra_class_add_reg(regs, classes[i], reg); |
|
|
|
ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); |
|
|
|
|
|
|
|
ra_reg_to_grf[reg] = j; |
|
|
|
brw->wm.ra_reg_to_grf[reg] = j; |
|
|
|
|
|
|
|
for (int base_reg = j; |
|
|
|
base_reg < j + class_sizes[i]; |
|
|
|
base_reg++) { |
|
|
|
ra_add_transitive_reg_conflict(regs, base_reg, reg); |
|
|
|
ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); |
|
|
|
} |
|
|
|
|
|
|
|
reg++; |
|
|
@@ -183,30 +147,83 @@ fs_visitor::assign_regs() |
|
|
|
* in on gen5 so that we can do PLN. |
|
|
|
*/ |
|
|
|
if (brw->has_pln && reg_width == 1 && intel->gen < 6) { |
|
|
|
aligned_pairs_class = ra_alloc_reg_class(regs); |
|
|
|
brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); |
|
|
|
|
|
|
|
for (int i = 0; i < pairs_reg_count; i++) { |
|
|
|
if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { |
|
|
|
ra_class_add_reg(regs, aligned_pairs_class, |
|
|
|
if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { |
|
|
|
ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, |
|
|
|
pairs_base_reg + i); |
|
|
|
} |
|
|
|
} |
|
|
|
class_count++; |
|
|
|
} |
|
|
|
|
|
|
|
ra_set_finalize(regs); |
|
|
|
ra_set_finalize(brw->wm.regs); |
|
|
|
} |
|
|
|
|
|
|
|
bool |
|
|
|
fs_visitor::assign_regs() |
|
|
|
{ |
|
|
|
/* Most of this allocation was written for a reg_width of 1 |
|
|
|
* (dispatch_width == 8). In extending to 16-wide, the code was |
|
|
|
* left in place and it was converted to have the hardware |
|
|
|
* registers it's allocating be contiguous physical pairs of regs |
|
|
|
* for reg_width == 2. |
|
|
|
*/ |
|
|
|
int reg_width = c->dispatch_width / 8; |
|
|
|
int hw_reg_mapping[this->virtual_grf_next]; |
|
|
|
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); |
|
|
|
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; |
|
|
|
int class_sizes[base_reg_count]; |
|
|
|
int class_count = 0; |
|
|
|
|
|
|
|
calculate_live_intervals(); |
|
|
|
|
|
|
|
/* Set up the register classes. |
|
|
|
* |
|
|
|
* The base registers store a scalar value. For texture samples, |
|
|
|
* we get virtual GRFs composed of 4 contiguous hw register. For |
|
|
|
* structures and arrays, we store them as contiguous larger things |
|
|
|
* than that, though we should be able to do better most of the |
|
|
|
* time. |
|
|
|
*/ |
|
|
|
class_sizes[class_count++] = 1; |
|
|
|
if (brw->has_pln && intel->gen < 6) { |
|
|
|
/* Always set up the (unaligned) pairs for gen5, so we can find |
|
|
|
* them for making the aligned pair class. |
|
|
|
*/ |
|
|
|
class_sizes[class_count++] = 2; |
|
|
|
} |
|
|
|
for (int r = 0; r < this->virtual_grf_next; r++) { |
|
|
|
int i; |
|
|
|
|
|
|
|
for (i = 0; i < class_count; i++) { |
|
|
|
if (class_sizes[i] == this->virtual_grf_sizes[r]) |
|
|
|
break; |
|
|
|
} |
|
|
|
if (i == class_count) { |
|
|
|
if (this->virtual_grf_sizes[r] >= base_reg_count) { |
|
|
|
fail("Object too large to register allocate.\n"); |
|
|
|
} |
|
|
|
|
|
|
|
class_sizes[class_count++] = this->virtual_grf_sizes[r]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, |
|
|
|
reg_width, base_reg_count); |
|
|
|
|
|
|
|
struct ra_graph *g = ra_alloc_interference_graph(regs, |
|
|
|
struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, |
|
|
|
this->virtual_grf_next); |
|
|
|
|
|
|
|
for (int i = 0; i < this->virtual_grf_next; i++) { |
|
|
|
for (int c = 0; c < class_count; c++) { |
|
|
|
if (class_sizes[c] == this->virtual_grf_sizes[i]) { |
|
|
|
if (aligned_pairs_class >= 0 && |
|
|
|
if (brw->wm.aligned_pairs_class >= 0 && |
|
|
|
this->delta_x.reg == i) { |
|
|
|
ra_set_node_class(g, i, aligned_pairs_class); |
|
|
|
ra_set_node_class(g, i, brw->wm.aligned_pairs_class); |
|
|
|
} else { |
|
|
|
ra_set_node_class(g, i, classes[c]); |
|
|
|
ra_set_node_class(g, i, brw->wm.classes[c]); |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
@@ -237,7 +254,6 @@ fs_visitor::assign_regs() |
|
|
|
|
|
|
|
|
|
|
|
ralloc_free(g); |
|
|
|
ralloc_free(regs); |
|
|
|
|
|
|
|
return false; |
|
|
|
} |
|
|
@@ -250,7 +266,8 @@ fs_visitor::assign_regs() |
|
|
|
for (int i = 0; i < this->virtual_grf_next; i++) { |
|
|
|
int reg = ra_get_node_reg(g, i); |
|
|
|
|
|
|
|
hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width; |
|
|
|
hw_reg_mapping[i] = (first_assigned_grf + |
|
|
|
brw->wm.ra_reg_to_grf[reg] * reg_width); |
|
|
|
this->grf_used = MAX2(this->grf_used, |
|
|
|
hw_reg_mapping[i] + this->virtual_grf_sizes[i] * |
|
|
|
reg_width); |
|
|
@@ -265,7 +282,6 @@ fs_visitor::assign_regs() |
|
|
|
} |
|
|
|
|
|
|
|
ralloc_free(g); |
|
|
|
ralloc_free(regs); |
|
|
|
|
|
|
|
return true; |
|
|
|
} |