Procházet zdrojové kódy

i965/fs: Factor out the register allocator setup to a separate function.

Besides separating out a logical step of the giant register allocator
function, this now communicates a bunch of the allocator information
through entries in brw_context, which will make this code partially
reusable for caching the expensive allocator setup.
tags/mesa-8.0-rc1
Eric Anholt před 14 roky
rodič
revize
b1f0bffd39

+ 23
- 0
src/mesa/drivers/dri/i965/brw_context.h Zobrazit soubor

@@ -748,6 +748,29 @@ struct brw_context
* Pre-gen6, push constants live in the CURBE.
*/
uint32_t push_const_offset;

/** @{ register allocator */

struct ra_regs *regs;

/** Array of the ra classes for the unaligned contiguous
* register block sizes used.
*/
int *classes;

/**
* Mapping for register-allocated objects in *regs to the first
* GRF for that object.
*/
uint8_t *ra_reg_to_grf;

/**
* ra class for the aligned pairs we use for PLN, which doesn't
* appear in *classes.
*/
int aligned_pairs_class;

/** @} */
} wm;



+ 82
- 66
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp Zobrazit soubor

@@ -87,55 +87,14 @@ fs_visitor::assign_regs_trivial()

}

bool
fs_visitor::assign_regs()
static void
brw_alloc_reg_set_for_classes(struct brw_context *brw,
int *class_sizes,
int class_count,
int reg_width,
int base_reg_count)
{
/* Most of this allocation was written for a reg_width of 1
* (dispatch_width == 8). In extending to 16-wide, the code was
* left in place and it was converted to have the hardware
* registers it's allocating be contiguous physical pairs of regs
* for reg_width == 2.
*/
int reg_width = c->dispatch_width / 8;
int hw_reg_mapping[this->virtual_grf_next];
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
int class_sizes[base_reg_count];
int class_count = 0;
int aligned_pairs_class = -1;

calculate_live_intervals();

/* Set up the register classes.
*
* The base registers store a scalar value. For texture samples,
* we get virtual GRFs composed of 4 contiguous hw register. For
* structures and arrays, we store them as contiguous larger things
* than that, though we should be able to do better most of the
* time.
*/
class_sizes[class_count++] = 1;
if (brw->has_pln && intel->gen < 6) {
/* Always set up the (unaligned) pairs for gen5, so we can find
* them for making the aligned pair class.
*/
class_sizes[class_count++] = 2;
}
for (int r = 0; r < this->virtual_grf_next; r++) {
int i;

for (i = 0; i < class_count; i++) {
if (class_sizes[i] == this->virtual_grf_sizes[r])
break;
}
if (i == class_count) {
if (this->virtual_grf_sizes[r] >= base_reg_count) {
fail("Object too large to register allocate.\n");
}

class_sizes[class_count++] = this->virtual_grf_sizes[r];
}
}
struct intel_context *intel = &brw->intel;

/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
@@ -143,9 +102,14 @@ fs_visitor::assign_regs()
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
}

struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
uint8_t ra_reg_to_grf[ra_reg_count];
int classes[class_count + 1];
ralloc_free(brw->wm.ra_reg_to_grf);
brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
ralloc_free(brw->wm.regs);
brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
ralloc_free(brw->wm.classes);
brw->wm.classes = ralloc_array(brw, int, class_count + 1);

brw->wm.aligned_pairs_class = -1;

/* Now, add the registers to their classes, and add the conflicts
* between them and the base GRF registers (and also each other).
@@ -155,7 +119,7 @@ fs_visitor::assign_regs()
int pairs_reg_count = 0;
for (int i = 0; i < class_count; i++) {
int class_reg_count = base_reg_count - (class_sizes[i] - 1);
classes[i] = ra_alloc_reg_class(regs);
brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);

/* Save this off for the aligned pair class at the end. */
if (class_sizes[i] == 2) {
@@ -164,14 +128,14 @@ fs_visitor::assign_regs()
}

for (int j = 0; j < class_reg_count; j++) {
ra_class_add_reg(regs, classes[i], reg);
ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);

ra_reg_to_grf[reg] = j;
brw->wm.ra_reg_to_grf[reg] = j;

for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
ra_add_transitive_reg_conflict(regs, base_reg, reg);
ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
}

reg++;
@@ -183,30 +147,83 @@ fs_visitor::assign_regs()
* in on gen5 so that we can do PLN.
*/
if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
aligned_pairs_class = ra_alloc_reg_class(regs);
brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);

for (int i = 0; i < pairs_reg_count; i++) {
if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
ra_class_add_reg(regs, aligned_pairs_class,
if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
pairs_base_reg + i);
}
}
class_count++;
}

ra_set_finalize(regs);
ra_set_finalize(brw->wm.regs);
}

bool
fs_visitor::assign_regs()
{
/* Most of this allocation was written for a reg_width of 1
* (dispatch_width == 8). In extending to 16-wide, the code was
* left in place and it was converted to have the hardware
* registers it's allocating be contiguous physical pairs of regs
* for reg_width == 2.
*/
int reg_width = c->dispatch_width / 8;
int hw_reg_mapping[this->virtual_grf_next];
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
int class_sizes[base_reg_count];
int class_count = 0;

calculate_live_intervals();

/* Set up the register classes.
*
* The base registers store a scalar value. For texture samples,
* we get virtual GRFs composed of 4 contiguous hw register. For
* structures and arrays, we store them as contiguous larger things
* than that, though we should be able to do better most of the
* time.
*/
class_sizes[class_count++] = 1;
if (brw->has_pln && intel->gen < 6) {
/* Always set up the (unaligned) pairs for gen5, so we can find
* them for making the aligned pair class.
*/
class_sizes[class_count++] = 2;
}
for (int r = 0; r < this->virtual_grf_next; r++) {
int i;

for (i = 0; i < class_count; i++) {
if (class_sizes[i] == this->virtual_grf_sizes[r])
break;
}
if (i == class_count) {
if (this->virtual_grf_sizes[r] >= base_reg_count) {
fail("Object too large to register allocate.\n");
}

class_sizes[class_count++] = this->virtual_grf_sizes[r];
}
}

brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
reg_width, base_reg_count);

struct ra_graph *g = ra_alloc_interference_graph(regs,
struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
this->virtual_grf_next);

for (int i = 0; i < this->virtual_grf_next; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
if (aligned_pairs_class >= 0 &&
if (brw->wm.aligned_pairs_class >= 0 &&
this->delta_x.reg == i) {
ra_set_node_class(g, i, aligned_pairs_class);
ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
} else {
ra_set_node_class(g, i, classes[c]);
ra_set_node_class(g, i, brw->wm.classes[c]);
}
break;
}
@@ -237,7 +254,6 @@ fs_visitor::assign_regs()


ralloc_free(g);
ralloc_free(regs);

return false;
}
@@ -250,7 +266,8 @@ fs_visitor::assign_regs()
for (int i = 0; i < this->virtual_grf_next; i++) {
int reg = ra_get_node_reg(g, i);

hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
hw_reg_mapping[i] = (first_assigned_grf +
brw->wm.ra_reg_to_grf[reg] * reg_width);
this->grf_used = MAX2(this->grf_used,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
reg_width);
@@ -265,7 +282,6 @@ fs_visitor::assign_regs()
}

ralloc_free(g);
ralloc_free(regs);

return true;
}

Načítá se…
Zrušit
Uložit