Browse Source

Fix xyz/w interaction (needs a cleanup still..)

Use SRC0A instead of WZY/XXX combination for W in XYZ positions.
Remove dodgy hack from POW opcode, now works correctly without it
tags/mesa_20050610
Ben Skeggs 21 years ago
parent
commit
3c4c6d1f80
2 changed files with 128 additions and 117 deletions
  1. 2
    2
      src/mesa/drivers/dri/r300/r300_context.c
  2. 126
    115
      src/mesa/drivers/dri/r300/r300_fragprog.c

+ 2
- 2
src/mesa/drivers/dri/r300/r300_context.c View File

@@ -74,9 +74,10 @@ static const char *const card_extensions[] = {
"GL_ARB_texture_border_clamp",
"GL_ARB_texture_compression",
/* disable until we support it, fixes a few things in ut2004 */
// "GL_ARB_texture_cube_map",
/* "GL_ARB_texture_cube_map", */
"GL_ARB_texture_env_add",
"GL_ARB_texture_env_combine",
"GL_ARB_texture_env_crossbar",
"GL_ARB_texture_env_dot3",
"GL_ARB_texture_mirrored_repeat",
"GL_ARB_vertex_buffer_object",
@@ -104,7 +105,6 @@ static const char *const card_extensions[] = {
"GL_NV_blend_square",
"GL_NV_vertex_program",
"GL_SGIS_generate_mipmap",
"GL_ARB_texture_env_crossbar",
NULL
};


+ 126
- 115
src/mesa/drivers/dri/r300/r300_fragprog.c View File

@@ -40,8 +40,6 @@
* fglrx does (see r300_reg.h).
* - Verify results of opcodes for accuracy, I've only checked them
* in specific cases.
* - Learn more about interaction between xyz/w units.. A few bugs are
* caused by something I'm missing..
* - and more...
*/

@@ -112,11 +110,13 @@ static const struct r300_pfv_swizzle {
{ "xxx", MAKE_SWZ3(X, X, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_XXX, 4, GL_FALSE },
{ "yyy", MAKE_SWZ3(Y, Y, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_YYY, 4, GL_FALSE },
{ "zzz", MAKE_SWZ3(Z, Z, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZZZ, 4, GL_FALSE },
{ "www", MAKE_SWZ3(W, W, W), GL_TRUE, R300_FPI0_ARGC_SRC0A, 1, GL_TRUE },
{ "yzx", MAKE_SWZ3(Y, Z, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_YZX, 1, GL_FALSE },
{ "zxy", MAKE_SWZ3(Z, X, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZXY, 1, GL_FALSE },
{ "wzy", MAKE_SWZ3(W, Z, Y), GL_TRUE, R300_FPI0_ARGC_SRC0CA_WZY, 1, GL_TRUE },
/* disable this for now, until I find a clean way of making sure xyz/w streams
* have a source in the same register slot.. */
// { "wzy", MAKE_SWZ3(W, Z, Y), GL_TRUE, R300_FPI0_ARGC_SRC0CA_WZY, 1, GL_TRUE },
/* special cases */
{ NULL, MAKE_SWZ3(W, W, W), GL_FALSE, 0, 0, GL_FALSE},
{ NULL, MAKE_SWZ3(ONE, ONE, ONE), GL_FALSE, R300_FPI0_ARGC_ONE, 0, GL_FALSE},
{ NULL, MAKE_SWZ3(ZERO, ZERO, ZERO), GL_FALSE, R300_FPI0_ARGC_ZERO, 0, GL_FALSE},
{ NULL, PFS_INVAL, GL_FALSE, R300_FPI0_ARGC_HALF, 0, GL_FALSE},
@@ -124,10 +124,10 @@ static const struct r300_pfv_swizzle {
};
#define SWIZZLE_XYZ 0
#define SWIZZLE_XXX 1
#define SWIZZLE_WZY 6
#define SWIZZLE_111 8
#define SWIZZLE_000 9
#define SWIZZLE_HHH 10
#define SWIZZLE_WWW 4
#define SWIZZLE_111 7
#define SWIZZLE_000 8
#define SWIZZLE_HHH 9

#define SWZ_X_MASK (7 << 0)
#define SWZ_Y_MASK (7 << 3)
@@ -320,30 +320,6 @@ static int swz_special_case(struct r300_fragment_program *rp,
pfs_reg_t ssrc = pfs_default_reg;

switch(GET_SWZ(v_swiz[src.v_swz].hash, 0)) {
case SWIZZLE_W:
ssrc = get_temp_reg(rp);
src.v_swz = SWIZZLE_WZY;
if (s_mask[mask].count == 3) {
emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XW, src, pfs_one, pfs_zero, 0);
*r = ssrc;
r->v_swz = SWIZZLE_XXX;
r->s_swz = SWIZZLE_W;
} else if (mc + s_mask[mask].count == 3) {
if (!r->valid)
*r = get_temp_reg(rp);
emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XW, src, pfs_one, pfs_zero, 0);
ssrc.v_swz = SWIZZLE_XXX;
emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask|WRITEMASK_W, ssrc, pfs_one, pfs_zero, 0);
free_temp(rp, ssrc);
} else {
if (!r->valid)
*r = get_temp_reg(rp);
emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_X, src, pfs_one, pfs_zero, 0);
ssrc.v_swz = SWIZZLE_XXX;
emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, ssrc, pfs_one, pfs_zero, 0);
free_temp(rp, ssrc);
}
break;
case SWIZZLE_ONE:
case SWIZZLE_ZERO:
if (!r->valid)
@@ -472,16 +448,16 @@ static void sync_streams(struct r300_fragment_program *rp) {
/* Bring vector/scalar streams into sync, inserting nops into
* whatever stream is lagging behind
*
* I'm using "MAD t0, t0, 1.0, 0.0" as a NOP
* Using NOP == MAD out.none, 0, 0, 0
*/
while (rp->v_pos != rp->s_pos) {
if (rp->s_pos > rp->v_pos) {
rp->alu.inst[rp->v_pos].inst0 = 0x00050A80;
rp->alu.inst[rp->v_pos].inst1 = 0x03820800;
rp->alu.inst[rp->v_pos].inst0 = 0x00050A14;
rp->alu.inst[rp->v_pos].inst1 = 0x00020820;
rp->v_pos++;
} else {
rp->alu.inst[rp->s_pos].inst2 = 0x00040889;
rp->alu.inst[rp->s_pos].inst3 = 0x00820800;
rp->alu.inst[rp->s_pos].inst2 = 0x00040810;
rp->alu.inst[rp->s_pos].inst3 = 0x00020820;
rp->s_pos++;
}
}
@@ -550,25 +526,68 @@ static void emit_tex(struct r300_fragment_program *rp,
rp->node[rp->cur_node].tex_end++;
}

#define ARG_NEG (1<<5)
#define ARG_ABS (1<<6)
#define SRC_CONST (1<<5)
#define SRC_STRIDE 6

static int t_hw_src(struct r300_fragment_program *rp, pfs_reg_t src)
{
int idx;

switch (src.type) {
case REG_TYPE_TEMP:
idx = rp->temps[src.index];
break;
case REG_TYPE_INPUT:
idx = rp->inputs[src.index];
break;
case REG_TYPE_CONST:
return (src.index | SRC_CONST);
default:
ERROR("Invalid type for source reg\n");
return (0 | SRC_CONST);
}

rp->used_in_node |= (1 << idx);
return idx;
}

/* Add sources to FPI1/FPI3 lists. If source is already on list,
* reuse the index instead of wasting a source.
*/
static inline int add_src(int src[3], int *cnt, int reg) {
int i;

for (i=0;i<*cnt;i++)
if (src[i] == reg) return i;
if (*cnt == 3) assert(0); /* I don't *think* this can happen */

src[*cnt] = reg;
return (*cnt)++;
}

static void emit_arith(struct r300_fragment_program *rp, int op,
pfs_reg_t dest, int mask,
pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
int flags)
{
pfs_reg_t src[3] = { src0, src1, src2 };
/* XYZ/W emit control */
int v_idx = rp->v_pos, s_idx = rp->s_pos;
GLboolean emit_v = GL_FALSE, emit_s = GL_FALSE;
/* INST1/INST3 sources */
int vsrc[3], ssrc[3];
int nvs = 0, nss = 0;
/* INST0/INST2 sources */
int vswz[3], sswz[3];
/* temp stuff */
int hwdest, hwsrc;
int argc;
int v_idx = rp->v_pos, s_idx = rp->s_pos;
GLuint inst[4] = { 0, 0, 0, 0 };
int vop, sop;
int i;

#define ARG_NEG (1<<5)
#define ARG_ABS (1<<6)
#define ARG_STRIDE 7
#define SRC_CONST (1<<5)
#define SRC_STRIDE 6

if (!dest.valid || !src0.valid || !src1.valid || !src2.valid) {
ERROR("invalid register. dest/src0/src1/src2 valid = %d/%d/%d/%d\n",
dest.valid, src0.valid, src1.valid, src2.valid);
@@ -598,96 +617,91 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
ERROR("invalid dest reg type %d\n", dest.type);
return;
}
/* grab hwregs of sources */
int str;
for (i=0;i<3;i++) {
if (i<argc) {
/* Decide on hardware source index */
switch (src[i].type) {
case REG_TYPE_INPUT:
hwsrc = rp->inputs[src[i].index];
rp->used_in_node |= (1 << hwsrc);

inst[1] |= hwsrc << (i * SRC_STRIDE);
inst[3] |= hwsrc << (i * SRC_STRIDE);
break;
case REG_TYPE_TEMP:
/* make sure insn ordering is right... */
if ((v_swiz[src[i].v_swz].dep_sca && v_idx < s_idx) ||
(s_swiz[src[i].s_swz].dep_vec && s_idx < v_idx)) {
hwsrc = t_hw_src(rp, src[i]);
if (mask & WRITEMASK_XYZ && vop != R300_FPI0_OUTC_REPL_ALPHA) {
if (v_swiz[src[i].v_swz].dep_sca) {
sync_streams(rp);
v_idx = s_idx = rp->v_pos;
}
emit_s = GL_TRUE;
str = add_src(ssrc, &nss, hwsrc);
} else
str = add_src(vsrc, &nvs, hwsrc);
vswz[i] = v_swiz[src[i].v_swz].base + (str * v_swiz[src[i].v_swz].stride);
} else
vswz[i] = R300_FPI0_ARGC_ZERO;

if (mask & WRITEMASK_W || vop == R300_FPI0_OUTC_REPL_ALPHA) {
if (s_swiz[src[i].s_swz].dep_vec) {
sync_streams(rp);
v_idx = s_idx = rp->v_pos;
emit_v = GL_TRUE;
str = add_src(vsrc, &nvs, hwsrc);
} else
str = add_src(ssrc, &nss, hwsrc);
sswz[i] = s_swiz[src[i].s_swz].base + (str * s_swiz[src[i].s_swz].stride);
} else
sswz[i] = R300_FPI2_ARGA_ZERO;
hwsrc = rp->temps[src[i].index];
rp->used_in_node |= (1 << hwsrc);

inst[1] |= hwsrc << (i * SRC_STRIDE);
inst[3] |= hwsrc << (i * SRC_STRIDE);
break;
case REG_TYPE_CONST:
hwsrc = src[i].index;

inst[1] |= ((hwsrc | SRC_CONST) << (i * SRC_STRIDE));
inst[3] |= ((hwsrc | SRC_CONST) << (i * SRC_STRIDE));
break;
default:
ERROR("invalid source reg\n");
return;
}

/* Swizzling/Negation */
if (vop == R300_FPI0_OUTC_REPL_ALPHA)
inst[0] |= R300_FPI0_ARGC_ZERO << (i * ARG_STRIDE);
else
inst[0] |= (v_swiz[src[i].v_swz].base + (i * v_swiz[src[i].v_swz].stride)) << (i*ARG_STRIDE);
inst[2] |= (s_swiz[src[i].s_swz].base + (i * s_swiz[src[i].s_swz].stride)) << (i*ARG_STRIDE);

if (src[i].negate) {
inst[0] |= ARG_NEG << (i * ARG_STRIDE);
inst[2] |= ARG_NEG << (i * ARG_STRIDE);
vswz[i] |= ARG_NEG;
sswz[i] |= ARG_NEG;
}

if (flags & PFS_FLAG_ABS) {
inst[0] |= ARG_ABS << (i * ARG_STRIDE);
inst[2] |= ARG_ABS << (i * ARG_STRIDE);
vswz[i] |= ARG_ABS;
sswz[i] |= ARG_ABS;
}
} else {
/* read constant 0, use zero swizzle aswell */
inst[0] |= R300_FPI0_ARGC_ZERO << (i*ARG_STRIDE);
inst[1] |= SRC_CONST << (i*SRC_STRIDE);
inst[2] |= R300_FPI2_ARGA_ZERO << (i*ARG_STRIDE);
inst[3] |= SRC_CONST << (i*SRC_STRIDE);
vswz[i] = R300_FPI0_ARGC_ZERO;
sswz[i] = R300_FPI2_ARGA_ZERO;
}
}
/* Unused sources, read constant reg 0 */
for (i=nvs;i<3;i++)
vsrc[i] = 0 | SRC_CONST;
for (i=nss;i<3;i++)
ssrc[i] = 0 | SRC_CONST;

if (flags & PFS_FLAG_SAT) {
vop |= R300_FPI0_OUTC_SAT;
sop |= R300_FPI2_OUTA_SAT;
}
if (mask & WRITEMASK_XYZ) {
if (mask & WRITEMASK_XYZ || emit_v) {
if (r300_fpop[op].v_op == R300_FPI0_OUTC_REPL_ALPHA) {
sync_streams(rp);
s_idx = v_idx = rp->v_pos;
}
rp->alu.inst[v_idx].inst0 = inst[0] | vop;
rp->alu.inst[v_idx].inst1 = inst[1] |
(hwdest << R300_FPI1_DSTC_SHIFT) |
rp->alu.inst[v_idx].inst0 = vop |
vswz[0] << R300_FPI0_ARG0C_SHIFT |
vswz[1] << R300_FPI0_ARG1C_SHIFT |
vswz[2] << R300_FPI0_ARG2C_SHIFT;
rp->alu.inst[v_idx].inst1 = hwdest << R300_FPI1_DSTC_SHIFT |
vsrc[0] << R300_FPI1_SRC0C_SHIFT |
vsrc[1] << R300_FPI1_SRC1C_SHIFT |
vsrc[2] << R300_FPI1_SRC2C_SHIFT |
((mask & WRITEMASK_XYZ) << (dest.type == REG_TYPE_OUTPUT ? 26 : 23));
rp->v_pos = v_idx + 1;
}
if ((mask & WRITEMASK_W) || r300_fpop[op].v_op == R300_FPI0_OUTC_REPL_ALPHA) {
rp->alu.inst[s_idx].inst2 = inst[2] | sop;
rp->alu.inst[s_idx].inst3 = inst[3] |
(hwdest << R300_FPI3_DSTA_SHIFT) |

if (mask & WRITEMASK_W || emit_s || vop == R300_FPI0_OUTC_REPL_ALPHA) {
rp->alu.inst[s_idx].inst2 = sop |
sswz[0] << R300_FPI2_ARG0A_SHIFT |
sswz[1] << R300_FPI2_ARG1A_SHIFT |
sswz[2] << R300_FPI2_ARG2A_SHIFT;
rp->alu.inst[s_idx].inst3 = hwdest << R300_FPI3_DSTA_SHIFT |
ssrc[0] << R300_FPI3_SRC0A_SHIFT |
ssrc[1] << R300_FPI3_SRC1A_SHIFT |
ssrc[2] << R300_FPI3_SRC2A_SHIFT |
(((mask & WRITEMASK_W)?1:0) << (dest.type == REG_TYPE_OUTPUT ? 24 : 23));
rp->s_pos = s_idx + 1;
}

/* Force this for now */
sync_streams(rp);
/* sync_streams(rp); */
return;
};
@@ -791,17 +805,14 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
flags);
break;
case FP_OPCODE_POW:
/* I don't like this, and it's probably wrong in some
* circumstances... Needs checking */
src0 = t_src(rp, fpi->SrcReg[0]);
src1 = t_src(rp, fpi->SrcReg[1]);
dest = t_dst(rp, fpi->DstReg);
temp = get_temp_reg(rp);
temp.s_swz = SWIZZLE_X; /* cheat, bypass swizzle code */

emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_X,
emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
src0, pfs_zero, pfs_zero, 0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
temp, src1, pfs_zero, 0);
emit_arith(rp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask,
temp, pfs_zero, pfs_zero, 0);
@@ -969,12 +980,12 @@ void translate_fragment_shader(struct r300_fragment_program *rp)

if (!rp->translated) {
init_program(rp);
if (parse_program(rp) == GL_FALSE) {
dump_program(rp);
return;
}
/* Finish off */
sync_streams(rp);
rp->node[rp->cur_node].alu_end = rp->v_pos - 1;

Loading…
Cancel
Save