Browse Source

r300/compiler: Add peephole optimization for the 'add' presubtract operation

tags/mesa-7.9-rc1
Tom Stellard 15 years ago
parent
commit
a64b4a05af

+ 158
- 68
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c View File

@@ -38,6 +38,10 @@ struct peephole_state {
unsigned int WriteMask;
};

typedef void (*rc_presub_replace_fn)(struct peephole_state *,
struct rc_instruction *,
unsigned int);

static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
struct rc_src_register combine;
@@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
}
}

/**
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
* @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
static int peephole_add_presub_inv(
static int presub_helper(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
struct peephole_state * s,
rc_presubtract_op presub_opcode,
rc_presub_replace_fn presub_replace)
{
unsigned int i, swz, mask;
struct rc_instruction * inst;
unsigned int can_remove = 0;
unsigned int cant_sub = 0;
struct rc_instruction * inst;
struct peephole_state s;

if (inst_add->U.I.SaturateMode)
return 0;

mask = inst_add->U.I.DstReg.WriteMask;

/* Check if src0 is 1. */
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
for(i = 0; i < 4; i++ ) {
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
&& swz != RC_SWIZZLE_ONE) {
return 0;
}
}

/* Check src1. */
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
inst_add->U.I.DstReg.WriteMask
|| inst_add->U.I.SrcReg[1].Abs
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {

return 0;
}

/* Setup the peephole_state information. */
s.Inst = inst_add;
s.WriteMask = inst_add->U.I.DstReg.WriteMask;

/* For all instructions that read inst_add->U.I.DstReg before it is
* written again, use the 1 - src0 presubtact instead. */
for(inst = inst_add->Next; inst != &c->Program.Instructions;
for(inst = s->Inst->Next; inst != &c->Program.Instructions;
inst = inst->Next) {
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);

for(i = 0; i < info->NumSrcRegs; i++) {
if(inst_add->U.I.DstReg.WriteMask !=
if(s->Inst->U.I.DstReg.WriteMask !=
src_reads_dst_mask(inst->U.I.SrcReg[i],
inst_add->U.I.DstReg)) {
s->Inst->U.I.DstReg)) {
continue;
}
if (cant_sub) {
@@ -601,47 +563,173 @@ static int peephole_add_presub_inv(
* instruction, unless the two prsubtract operations
* are the same and read from the same registers. */
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
if (inst->U.I.PreSub.Opcode != presub_opcode
|| inst->U.I.PreSub.SrcReg[0].File !=
inst_add->U.I.SrcReg[1].File
s->Inst->U.I.SrcReg[1].File
|| inst->U.I.PreSub.SrcReg[0].Index !=
inst_add->U.I.SrcReg[1].Index) {
s->Inst->U.I.SrcReg[1].Index) {

can_remove = 0;
break;
}
}
/* We must be careful not to modify inst_add, since it
* is possible it will remain part of the program. */
inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst->U.I.PreSub.SrcReg[0].Negate = 0;
inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
inst->U.I.PreSub.SrcReg[0]);

inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
presub_replace(s, inst, i);
can_remove = 1;
}
if(!can_remove)
break;
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
rc_for_all_writes_mask(inst, peephole_scan_write, s);
/* If all components of inst_add's destination register have
* been written to by subsequent instructions, the original
* value of the destination register is no longer valid and
* we can't keep doing substitutions. */
if (!s.WriteMask){
if (!s->WriteMask){
break;
}
/* Make this instruction doesn't write to the presubtract source. */
if (inst->U.I.DstReg.WriteMask &
src_reads_dst_mask(inst_add->U.I.SrcReg[1],
src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
inst->U.I.DstReg)
|| info->IsFlowControl) {
cant_sub = 1;
}
}
if(can_remove) {
return can_remove;
}

static void presub_replace_add(struct peephole_state *s,
struct rc_instruction * inst,
unsigned int src_index)
{
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
inst->U.I.PreSub.SrcReg[0].Negate = 0;
inst->U.I.PreSub.SrcReg[1].Negate = 0;
inst->U.I.PreSub.Opcode = RC_PRESUB_ADD;
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
inst->U.I.PreSub.SrcReg[0]);
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD;
}

static int peephole_add_presub_add(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
struct rc_src_register * src0 = NULL;
struct rc_src_register * src1 = NULL;
unsigned int i;
struct peephole_state s;

if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
return 0;

if (inst_add->U.I.SaturateMode)
return 0;

if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
return 0;

/* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
for (i = 0; i < 2; i++) {
if (inst_add->U.I.SrcReg[i].Abs)
return 0;
if ((inst_add->U.I.SrcReg[i].Negate
& inst_add->U.I.DstReg.WriteMask) ==
inst_add->U.I.DstReg.WriteMask) {
src0 = &inst_add->U.I.SrcReg[i];
} else if (!src1) {
src1 = &inst_add->U.I.SrcReg[i];
} else {
src0 = &inst_add->U.I.SrcReg[i];
}
}

if (!src1)
return 0;

/* XXX Only do add for now. */
if (src0->Negate)
return 0;

s.Inst = inst_add;
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}

static void presub_replace_inv(struct peephole_state * s,
struct rc_instruction * inst,
unsigned int src_index)
{
/* We must be careful not to modify s->Inst, since it
* is possible it will remain part of the program.
* XXX Maybe pass a struct instead of a pointer for s->Inst.*/
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
inst->U.I.PreSub.SrcReg[0].Negate = 0;
inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
inst->U.I.PreSub.SrcReg[0]);

inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
}

/**
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
* @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
static int peephole_add_presub_inv(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
unsigned int i, swz, mask;
struct peephole_state s;

if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
return 0;

if (inst_add->U.I.SaturateMode)
return 0;

mask = inst_add->U.I.DstReg.WriteMask;

/* Check if src0 is 1. */
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
for(i = 0; i < 4; i++ ) {
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
&& swz != RC_SWIZZLE_ONE) {
return 0;
}
}

/* Check src1. */
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
inst_add->U.I.DstReg.WriteMask
|| inst_add->U.I.SrcReg[1].Abs
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {

return 0;
}

/* Setup the peephole_state information. */
s.Inst = inst_add;
s.WriteMask = inst_add->U.I.DstReg.WriteMask;

if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add);
return 1;
}
@@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
if (c->has_presub) {
if(peephole_add_presub_inv(c, inst))
return 1;
if(peephole_add_presub_add(c, inst))
return 1;
}
break;
default:

+ 42
- 16
src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c View File

@@ -300,6 +300,7 @@ static int destructive_merge_instructions(
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
unsigned int one_way = 0;
struct radeon_pair_instruction_source srcp =
alpha->RGB.Src[srcp_src];
struct radeon_pair_instruction_source temp;
@@ -307,14 +308,27 @@ static int destructive_merge_instructions(
* 3rd arg of 0 means this is not an alpha source. */
free_source = rc_pair_alloc_source(rgb, 1, 0,
srcp.File, srcp.Index);
/* If free_source == srcp_src, then either the
* presubtract source is already in the correct place. */
if (free_source == srcp_src)
continue;
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;

temp = rgb->RGB.Src[srcp_src];
rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
/* srcp needs src0 and src1 to be the same */
if (free_source < srcp_src) {
if (!temp.Used)
continue;
free_source = rc_pair_alloc_source(rgb, 1, 0,
srcp.File, srcp.Index);
one_way = 1;
} else {
rgb->RGB.Src[free_source] = temp;
}
/* If free_source == srcp_src, then the presubtract
* source is already in the correct place. */
if (free_source == srcp_src)
continue;
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -331,12 +345,11 @@ static int destructive_merge_instructions(
/* We need to do this just in case register
* is one of the sources already, but in the
* wrong spot. */
else if(rgb->RGB.Arg[arg].Source == free_source)
else if(rgb->RGB.Arg[arg].Source == free_source
&& !one_way) {
rgb->RGB.Arg[arg].Source = srcp_src;
}
}
temp = rgb->RGB.Src[srcp_src];
rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
rgb->RGB.Src[free_source] = temp;
}
}

@@ -352,6 +365,7 @@ static int destructive_merge_instructions(
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
unsigned int one_way = 0;
struct radeon_pair_instruction_source srcp =
alpha->Alpha.Src[srcp_src];
struct radeon_pair_instruction_source temp;
@@ -359,14 +373,27 @@ static int destructive_merge_instructions(
* 3rd arg of 1 means this is an alpha source. */
free_source = rc_pair_alloc_source(rgb, 0, 1,
srcp.File, srcp.Index);
/* If free_source == srcp_src, then either the
* presubtract source is already in the correct place. */
if (free_source == srcp_src)
continue;
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;

temp = rgb->Alpha.Src[srcp_src];
rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
/* srcp needs src0 and src1 to be the same. */
if (free_source < srcp_src) {
if (!temp.Used)
continue;
free_source = rc_pair_alloc_source(rgb, 0, 1,
temp.File, temp.Index);
one_way = 1;
} else {
rgb->Alpha.Src[free_source] = temp;
}
/* If free_source == srcp_src, then the presubtract
* source is already in the correct place. */
if (free_source == srcp_src)
continue;
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -380,12 +407,11 @@ static int destructive_merge_instructions(
}
if (rgb->RGB.Arg[arg].Source == srcp_src)
rgb->RGB.Arg[arg].Source = free_source;
else if (rgb->RGB.Arg[arg].Source == free_source)
else if (rgb->RGB.Arg[arg].Source == free_source
&& !one_way) {
rgb->RGB.Arg[arg].Source = srcp_src;
}
}
temp = rgb->Alpha.Src[srcp_src];
rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
rgb->Alpha.Src[free_source] = temp;
}
}


Loading…
Cancel
Save