Browse Source

r300/compiler: implement DP2 opcode

tags/mesa-7.9-rc1
Marek Olšák 15 years ago
parent
commit
27eb2e2755

+ 1
- 1
src/gallium/drivers/r300/r300_tgsi_to_rc.c View File

@@ -103,7 +103,7 @@ static unsigned translate_opcode(unsigned opcode)
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
/* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;

+ 10
- 0
src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c View File

@@ -94,6 +94,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DP2,
.Name = "DP2",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DP3,
.Name = "DP3",
@@ -442,6 +448,10 @@ void rc_compute_sources_for_writemask(
case RC_OPCODE_ARL:
srcmasks[0] |= RC_MASK_X;
break;
case RC_OPCODE_DP2:
srcmasks[0] |= RC_MASK_XY;
srcmasks[1] |= RC_MASK_XY;
break;
case RC_OPCODE_DP3:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;

+ 3
- 0
src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h View File

@@ -64,6 +64,9 @@ typedef enum {
* dst.c = d src0.c / dy */
RC_OPCODE_DDY,

/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
RC_OPCODE_DP2,

/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
RC_OPCODE_DP3,


+ 34
- 9
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c View File

@@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c,
rc_remove_instruction(inst);
}

static void transform_DP3(struct radeon_compiler* c,
static void transform_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
src0.Swizzle &= ~(63 << (3 * 2));
src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
src1.Swizzle &= ~(63 << (3 * 2));
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}

@@ -553,6 +553,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
@@ -615,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
rc_remove_instruction(inst);
}

static void transform_r300_vertex_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_instruction *next_inst = inst->Next;
transform_DP2(c, inst);
next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
}

static void transform_r300_vertex_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}

static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -758,7 +782,8 @@ int r300_transform_vertex_alu(
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;

Loading…
Cancel
Save