|
|
@@ -38,6 +38,10 @@ struct peephole_state { |
|
|
|
unsigned int WriteMask; |
|
|
|
}; |
|
|
|
|
|
|
|
typedef void (*rc_presub_replace_fn)(struct peephole_state *, |
|
|
|
struct rc_instruction *, |
|
|
|
unsigned int); |
|
|
|
|
|
|
|
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) |
|
|
|
{ |
|
|
|
struct rc_src_register combine; |
|
|
@@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] |
|
|
|
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source |
|
|
|
* of the add instruction must have the constatnt 1 swizzle. This function |
|
|
|
* does not check const registers to see if their value is 1.0, so it should |
|
|
|
* be called after the constant_folding optimization. |
|
|
|
* @return |
|
|
|
* 0 if the ADD instruction is still part of the program. |
|
|
|
* 1 if the ADD instruction is no longer part of the program. |
|
|
|
*/ |
|
|
|
static int peephole_add_presub_inv( |
|
|
|
static int presub_helper( |
|
|
|
struct radeon_compiler * c, |
|
|
|
struct rc_instruction * inst_add) |
|
|
|
struct peephole_state * s, |
|
|
|
rc_presubtract_op presub_opcode, |
|
|
|
rc_presub_replace_fn presub_replace) |
|
|
|
{ |
|
|
|
unsigned int i, swz, mask; |
|
|
|
struct rc_instruction * inst; |
|
|
|
unsigned int can_remove = 0; |
|
|
|
unsigned int cant_sub = 0; |
|
|
|
struct rc_instruction * inst; |
|
|
|
struct peephole_state s; |
|
|
|
|
|
|
|
if (inst_add->U.I.SaturateMode) |
|
|
|
return 0; |
|
|
|
|
|
|
|
mask = inst_add->U.I.DstReg.WriteMask; |
|
|
|
|
|
|
|
/* Check if src0 is 1. */ |
|
|
|
/* XXX It would be nice to use is_src_uniform_constant here, but that |
|
|
|
* function only works if the register's file is RC_FILE_NONE */ |
|
|
|
for(i = 0; i < 4; i++ ) { |
|
|
|
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); |
|
|
|
if(((1 << i) & inst_add->U.I.DstReg.WriteMask) |
|
|
|
&& swz != RC_SWIZZLE_ONE) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Check src1. */ |
|
|
|
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != |
|
|
|
inst_add->U.I.DstReg.WriteMask |
|
|
|
|| inst_add->U.I.SrcReg[1].Abs |
|
|
|
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY |
|
|
|
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) |
|
|
|
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) { |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Setup the peephole_state information. */ |
|
|
|
s.Inst = inst_add; |
|
|
|
s.WriteMask = inst_add->U.I.DstReg.WriteMask; |
|
|
|
|
|
|
|
/* For all instructions that read inst_add->U.I.DstReg before it is |
|
|
|
* written again, use the 1 - src0 presubtact instead. */ |
|
|
|
for(inst = inst_add->Next; inst != &c->Program.Instructions; |
|
|
|
for(inst = s->Inst->Next; inst != &c->Program.Instructions; |
|
|
|
inst = inst->Next) { |
|
|
|
unsigned int i; |
|
|
|
const struct rc_opcode_info * info = |
|
|
|
rc_get_opcode_info(inst->U.I.Opcode); |
|
|
|
|
|
|
|
for(i = 0; i < info->NumSrcRegs; i++) { |
|
|
|
if(inst_add->U.I.DstReg.WriteMask != |
|
|
|
if(s->Inst->U.I.DstReg.WriteMask != |
|
|
|
src_reads_dst_mask(inst->U.I.SrcReg[i], |
|
|
|
inst_add->U.I.DstReg)) { |
|
|
|
s->Inst->U.I.DstReg)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (cant_sub) { |
|
|
@@ -601,47 +563,173 @@ static int peephole_add_presub_inv( |
|
|
|
* instruction, unless the two prsubtract operations |
|
|
|
* are the same and read from the same registers. */ |
|
|
|
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { |
|
|
|
if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV |
|
|
|
if (inst->U.I.PreSub.Opcode != presub_opcode |
|
|
|
|| inst->U.I.PreSub.SrcReg[0].File != |
|
|
|
inst_add->U.I.SrcReg[1].File |
|
|
|
s->Inst->U.I.SrcReg[1].File |
|
|
|
|| inst->U.I.PreSub.SrcReg[0].Index != |
|
|
|
inst_add->U.I.SrcReg[1].Index) { |
|
|
|
s->Inst->U.I.SrcReg[1].Index) { |
|
|
|
|
|
|
|
can_remove = 0; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
/* We must be careful not to modify inst_add, since it |
|
|
|
* is possible it will remain part of the program. */ |
|
|
|
inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; |
|
|
|
inst->U.I.PreSub.SrcReg[0].Negate = 0; |
|
|
|
inst->U.I.PreSub.Opcode = RC_PRESUB_INV; |
|
|
|
inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i], |
|
|
|
inst->U.I.PreSub.SrcReg[0]); |
|
|
|
|
|
|
|
inst->U.I.SrcReg[i].File = RC_FILE_PRESUB; |
|
|
|
inst->U.I.SrcReg[i].Index = RC_PRESUB_INV; |
|
|
|
presub_replace(s, inst, i); |
|
|
|
can_remove = 1; |
|
|
|
} |
|
|
|
if(!can_remove) |
|
|
|
break; |
|
|
|
rc_for_all_writes_mask(inst, peephole_scan_write, &s); |
|
|
|
rc_for_all_writes_mask(inst, peephole_scan_write, s); |
|
|
|
/* If all components of inst_add's destination register have |
|
|
|
* been written to by subsequent instructions, the original |
|
|
|
* value of the destination register is no longer valid and |
|
|
|
* we can't keep doing substitutions. */ |
|
|
|
if (!s.WriteMask){ |
|
|
|
if (!s->WriteMask){ |
|
|
|
break; |
|
|
|
} |
|
|
|
/* Make this instruction doesn't write to the presubtract source. */ |
|
|
|
if (inst->U.I.DstReg.WriteMask & |
|
|
|
src_reads_dst_mask(inst_add->U.I.SrcReg[1], |
|
|
|
src_reads_dst_mask(s->Inst->U.I.SrcReg[1], |
|
|
|
inst->U.I.DstReg) |
|
|
|
|| info->IsFlowControl) { |
|
|
|
cant_sub = 1; |
|
|
|
} |
|
|
|
} |
|
|
|
if(can_remove) { |
|
|
|
return can_remove; |
|
|
|
} |
|
|
|
|
|
|
|
static void presub_replace_add(struct peephole_state *s, |
|
|
|
struct rc_instruction * inst, |
|
|
|
unsigned int src_index) |
|
|
|
{ |
|
|
|
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; |
|
|
|
inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; |
|
|
|
inst->U.I.PreSub.SrcReg[0].Negate = 0; |
|
|
|
inst->U.I.PreSub.SrcReg[1].Negate = 0; |
|
|
|
inst->U.I.PreSub.Opcode = RC_PRESUB_ADD; |
|
|
|
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], |
|
|
|
inst->U.I.PreSub.SrcReg[0]); |
|
|
|
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; |
|
|
|
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD; |
|
|
|
} |
|
|
|
|
|
|
|
static int peephole_add_presub_add( |
|
|
|
struct radeon_compiler * c, |
|
|
|
struct rc_instruction * inst_add) |
|
|
|
{ |
|
|
|
struct rc_src_register * src0 = NULL; |
|
|
|
struct rc_src_register * src1 = NULL; |
|
|
|
unsigned int i; |
|
|
|
struct peephole_state s; |
|
|
|
|
|
|
|
if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) |
|
|
|
return 0; |
|
|
|
|
|
|
|
if (inst_add->U.I.SaturateMode) |
|
|
|
return 0; |
|
|
|
|
|
|
|
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) |
|
|
|
return 0; |
|
|
|
|
|
|
|
/* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ |
|
|
|
for (i = 0; i < 2; i++) { |
|
|
|
if (inst_add->U.I.SrcReg[i].Abs) |
|
|
|
return 0; |
|
|
|
if ((inst_add->U.I.SrcReg[i].Negate |
|
|
|
& inst_add->U.I.DstReg.WriteMask) == |
|
|
|
inst_add->U.I.DstReg.WriteMask) { |
|
|
|
src0 = &inst_add->U.I.SrcReg[i]; |
|
|
|
} else if (!src1) { |
|
|
|
src1 = &inst_add->U.I.SrcReg[i]; |
|
|
|
} else { |
|
|
|
src0 = &inst_add->U.I.SrcReg[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (!src1) |
|
|
|
return 0; |
|
|
|
|
|
|
|
/* XXX Only do add for now. */ |
|
|
|
if (src0->Negate) |
|
|
|
return 0; |
|
|
|
|
|
|
|
s.Inst = inst_add; |
|
|
|
s.WriteMask = inst_add->U.I.DstReg.WriteMask; |
|
|
|
if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) { |
|
|
|
rc_remove_instruction(inst_add); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
static void presub_replace_inv(struct peephole_state * s, |
|
|
|
struct rc_instruction * inst, |
|
|
|
unsigned int src_index) |
|
|
|
{ |
|
|
|
/* We must be careful not to modify s->Inst, since it |
|
|
|
* is possible it will remain part of the program. |
|
|
|
* XXX Maybe pass a struct instead of a pointer for s->Inst.*/ |
|
|
|
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; |
|
|
|
inst->U.I.PreSub.SrcReg[0].Negate = 0; |
|
|
|
inst->U.I.PreSub.Opcode = RC_PRESUB_INV; |
|
|
|
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], |
|
|
|
inst->U.I.PreSub.SrcReg[0]); |
|
|
|
|
|
|
|
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; |
|
|
|
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] |
|
|
|
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source |
|
|
|
* of the add instruction must have the constatnt 1 swizzle. This function |
|
|
|
* does not check const registers to see if their value is 1.0, so it should |
|
|
|
* be called after the constant_folding optimization. |
|
|
|
* @return |
|
|
|
* 0 if the ADD instruction is still part of the program. |
|
|
|
* 1 if the ADD instruction is no longer part of the program. |
|
|
|
*/ |
|
|
|
static int peephole_add_presub_inv( |
|
|
|
struct radeon_compiler * c, |
|
|
|
struct rc_instruction * inst_add) |
|
|
|
{ |
|
|
|
unsigned int i, swz, mask; |
|
|
|
struct peephole_state s; |
|
|
|
|
|
|
|
if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) |
|
|
|
return 0; |
|
|
|
|
|
|
|
if (inst_add->U.I.SaturateMode) |
|
|
|
return 0; |
|
|
|
|
|
|
|
mask = inst_add->U.I.DstReg.WriteMask; |
|
|
|
|
|
|
|
/* Check if src0 is 1. */ |
|
|
|
/* XXX It would be nice to use is_src_uniform_constant here, but that |
|
|
|
* function only works if the register's file is RC_FILE_NONE */ |
|
|
|
for(i = 0; i < 4; i++ ) { |
|
|
|
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); |
|
|
|
if(((1 << i) & inst_add->U.I.DstReg.WriteMask) |
|
|
|
&& swz != RC_SWIZZLE_ONE) { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Check src1. */ |
|
|
|
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != |
|
|
|
inst_add->U.I.DstReg.WriteMask |
|
|
|
|| inst_add->U.I.SrcReg[1].Abs |
|
|
|
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY |
|
|
|
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) |
|
|
|
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) { |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Setup the peephole_state information. */ |
|
|
|
s.Inst = inst_add; |
|
|
|
s.WriteMask = inst_add->U.I.DstReg.WriteMask; |
|
|
|
|
|
|
|
if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) { |
|
|
|
rc_remove_instruction(inst_add); |
|
|
|
return 1; |
|
|
|
} |
|
|
@@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) |
|
|
|
if (c->has_presub) { |
|
|
|
if(peephole_add_presub_inv(c, inst)) |
|
|
|
return 1; |
|
|
|
if(peephole_add_presub_add(c, inst)) |
|
|
|
return 1; |
|
|
|
} |
|
|
|
break; |
|
|
|
default: |