Преглед изворни кода

i965/fs: Split generate_math into gen4/gen6 and 1/2 operand variants.

This mirrors the structure Eric used in the new VS backend, and seems
simpler.  In particular, the math1/math2 split will avoid having to
figure out how many operands there are, as this is already known by the
caller.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
tags/mesa-8.0-rc1
Kenneth Graunke пре 14 година
родитељ
комит
74e927bcaf
2 измењених фајлова са 89 додато и 54 уклоњено
  1. 10
    1
      src/mesa/drivers/dri/i965/brw_fs.h
  2. 79
    53
      src/mesa/drivers/dri/i965/brw_fs_emit.cpp

+ 10
- 1
src/mesa/drivers/dri/i965/brw_fs.h Прегледај датотеку

@@ -487,7 +487,16 @@ public:
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
void generate_math1_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src);
void generate_math2_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
void generate_math_gen4(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src);
void generate_discard(fs_inst *inst);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);

+ 79
- 53
src/mesa/drivers/dri/i965/brw_fs_emit.cpp Прегледај датотеку

@@ -143,69 +143,85 @@ fs_visitor::generate_linterp(fs_inst *inst,
}

void
fs_visitor::generate_math(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
fs_visitor::generate_math1_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0)
{
int op = brw_math_function(inst->opcode);

if (intel->gen >= 6) {
assert(inst->mlen == 0);

if (inst->opcode == SHADER_OPCODE_POW) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src[0], src[1]);
assert(inst->mlen == 0);

if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
0, src[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);

if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
0, sechalf(src[0]),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
} else /* gen <= 5 */{
assert(inst->mlen >= 1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
0, src0,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);

brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
inst->base_mrf, src[0],
0, sechalf(src0),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}

if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
inst->base_mrf + 1, sechalf(src[0]),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
void
fs_visitor::generate_math2_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
{
int op = brw_math_function(inst->opcode);

brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
assert(inst->mlen == 0);

brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src0, src1);

if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}

void
fs_visitor::generate_math_gen4(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src)
{
int op = brw_math_function(inst->opcode);

assert(inst->mlen >= 1);

brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
inst->base_mrf, src,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);

if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
inst->base_mrf + 1, sechalf(src),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);

brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}

@@ -770,10 +786,20 @@ fs_visitor::generate_code()
case SHADER_OPCODE_SQRT:
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_POW:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
generate_math(inst, dst, src);
if (intel->gen >= 6) {
generate_math1_gen6(inst, dst, src[0]);
} else {
generate_math_gen4(inst, dst, src[0]);
}
break;
case SHADER_OPCODE_POW:
if (intel->gen >= 6) {
generate_math2_gen6(inst, dst, src[0], src[1]);
} else {
generate_math_gen4(inst, dst, src[0]);
}
break;
case FS_OPCODE_PIXEL_X:
generate_pixel_xy(dst, true);

Loading…
Откажи
Сачувај