Explorar el Código

nvc0: reenable some shader optimizations

CSE and constants folding.
tags/android-x86-2.2-r2
Christoph Bumiller hace 15 años
padre
commit
4fa429c876

+ 54
- 42
src/gallium/drivers/nvc0/nvc0_pc_optimize.c Ver fichero

@@ -276,7 +276,7 @@ nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)

for (s = 0; s < 3 && nvi->src[s]; ++s) {
ld = nvi->src[s]->value->insn;
if (!ld || ld->opcode != NV_OP_LD)
if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
continue;
if (!nvc0_insn_can_load(nvi, s, ld))
continue;
@@ -383,9 +383,8 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)

#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)

/*
static void
modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
{
if (mod & NV_MOD_ABS) {
if (type == NV_TYPE_F32)
@@ -400,10 +399,28 @@ modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
else
*val = ~(*val) + 1;
}
if (mod & NV_MOD_SAT) {
union {
float f;
uint32_t u;
int32_t i;
} u;
u.u = *val;
if (type == NV_TYPE_F32) {
u.f = CLAMP(u.f, -1.0f, 1.0f);
} else
if (type == NV_TYPE_U16) {
u.u = MIN2(u.u, 0xffff);
} else
if (type == NV_TYPE_S16) {
u.i = CLAMP(u.i, -32768, 32767);
}
*val = u.u;
}
if (mod & NV_MOD_NOT)
*val = ~*val;
}
*/

#if 0
static void
constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
struct nv_value *src0, struct nv_value *src1)
@@ -424,8 +441,8 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
u0.u32 = src0->reg.imm.u32;
u1.u32 = src1->reg.imm.u32;

modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
apply_modifiers(&u1.u32, type, nvi->src[1]->mod);

switch (nvi->opcode) {
case NV_OP_MAD:
@@ -468,14 +485,14 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,

nvi->opcode = NV_OP_MOV;

val = new_value(pc, NV_FILE_IMM, type);

val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
val->reg.imm.u32 = u.u32;

nv_reference(pc, nvi, 1, NULL);
nv_reference(pc, nvi, 0, val);

if (nvi->src[2]) { /* from MAD */
if (nvi->src[2]) {
/* from MAD */
nvi->src[1] = nvi->src[0];
nvi->src[0] = nvi->src[2];
nvi->src[2] = NULL;
@@ -506,7 +523,7 @@ constant_operand(struct nv_pc *pc,
type = NV_OPTYPE(nvi->opcode);

u.u32 = val->reg.imm.u32;
modifiers_apply(&u.u32, type, nvi->src[s]->mod);
apply_modifiers(&u.u32, type, nvi->src[s]->mod);

switch (NV_BASEOP(nvi->opcode)) {
case NV_OP_MUL:
@@ -576,23 +593,22 @@ constant_operand(struct nv_pc *pc,
break;
}
}
#endif

static int
nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
{
#if 0
struct nv_instruction *nvi, *next;
int j;

for (nvi = b->entry; nvi; nvi = next) {
struct nv_value *src0, *src1, *src;
int mod;
int s;
uint8_t mod[4];

next = nvi->next;

src0 = nvcg_find_immediate(nvi->src[0]);
src1 = nvcg_find_immediate(nvi->src[1]);
src0 = nvc0_pc_find_immediate(nvi->src[0]);
src1 = nvc0_pc_find_immediate(nvi->src[1]);

if (src0 && src1)
constant_expression(ctx->pc, nvi, src0, src1);
@@ -604,7 +620,7 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
constant_operand(ctx->pc, nvi, src1, 1);
}

/* try to combine MUL, ADD into MAD */
/* check if we can MUL + ADD -> MAD/FMA */
if (nvi->opcode != NV_OP_ADD)
continue;

@@ -622,20 +638,27 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
/* could have an immediate from above constant_* */
if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
continue;
s = (src == src0) ? 0 : 1;

mod[0] = nvi->src[0]->mod;
mod[1] = nvi->src[1]->mod;
mod[2] = src->insn->src[0]->mod;
mod[3] = src->insn->src[0]->mod;

if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
continue;

nvi->opcode = NV_OP_MAD;
mod = nvi->src[(src == src0) ? 0 : 1]->mod;
nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
nv_reference(ctx->pc, nvi, s, NULL);
nvi->src[2] = nvi->src[!s];

assert(!(mod & ~NV_MOD_NEG));
nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
nvi->src[1]->mod = src->insn->src[1]->mod;
nvi->src[0]->mod = mod[2] ^ mod[s];
nvi->src[1]->mod = mod[3];
}
DESCEND_ARBITRARY(j, nv_pass_lower_arith);
#endif
return 0;
}

@@ -1016,7 +1039,6 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
static int
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
{
#if 0
struct nv_instruction *ir, *ik, *next;
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
int s;
@@ -1030,23 +1052,13 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (ir->opcode != ik->opcode || ir->fixed)
continue;

if (!ir->def[0] || !ik->def[0] ||
ik->opcode == NV_OP_LDA ||
ik->opcode == NV_OP_STA ||
ik->opcode == NV_OP_MOV ||
nv_is_vector_op(ik->opcode))
continue; /* ignore loads, stores & moves */

if (ik->src[4] || ir->src[4])
continue; /* don't mess with address registers */
if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
continue;

if (ik->flags_src || ir->flags_src ||
ik->flags_def || ir->flags_def)
continue; /* and also not with flags, for now */
if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
continue;

if (ik->def[0]->reg.file == NV_FILE_OUT ||
ir->def[0]->reg.file == NV_FILE_OUT ||
!values_equal(ik->def[0], ir->def[0]))
if (!values_equal(ik->def[0], ir->def[0]))
continue;

for (s = 0; s < 3; ++s) {
@@ -1071,7 +1083,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (s == 3) {
nvc0_insn_delete(ir);
++reps;
nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
break;
}
}
@@ -1079,7 +1091,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
} while(reps);

DESCEND_ARBITRARY(s, nv_pass_cse);
#endif
return 0;
}


+ 9
- 9
src/gallium/drivers/nvc0/nvc0_pc_print.c Ver fichero

@@ -283,21 +283,21 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
{ NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
{ NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
{ NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
{ NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
{ NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
{ NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
{ NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
{ NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 },
{ NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 },
{ NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 },
{ NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
{ NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
{ NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
{ NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },

{ NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },

Cargando…
Cancelar
Guardar