| @@ -75,7 +75,8 @@ nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s) | |||
| case NV_OP_XOR: | |||
| case NV_OP_SHL: | |||
| case NV_OP_SHR: | |||
| return (s == 1) && (nvi->def[0]->reg.file == NV_FILE_GPR); | |||
| return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) && | |||
| (nvi->def[0]->reg.file == NV_FILE_GPR); | |||
| case NV_OP_MOV: | |||
| assert(s == 0); | |||
| return (nvi->def[0]->reg.file == NV_FILE_GPR); | |||
| @@ -87,6 +88,12 @@ nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s) | |||
| boolean | |||
| nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value) | |||
| { | |||
| int i; | |||
| for (i = 0; i < 3 && nvi->src[i]; ++i) | |||
| if (nvi->src[i]->value->reg.file == NV_FILE_IMM) | |||
| return FALSE; | |||
| switch (nvi->opcode) { | |||
| case NV_OP_ABS: | |||
| case NV_OP_ADD: | |||
| @@ -189,37 +196,89 @@ nv_pc_free_refs(struct nv_pc *pc) | |||
| FREE(pc->refs[i]); | |||
| } | |||
| static const char * | |||
| edge_name(ubyte type) | |||
| { | |||
| switch (type) { | |||
| case CFG_EDGE_FORWARD: return "forward"; | |||
| case CFG_EDGE_BACK: return "back"; | |||
| case CFG_EDGE_LOOP_ENTER: return "loop"; | |||
| case CFG_EDGE_LOOP_LEAVE: return "break"; | |||
| default: | |||
| return "?"; | |||
| } | |||
| } | |||
| void | |||
| nv_print_program(struct nv_basic_block *b) | |||
| nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) | |||
| { | |||
| struct nv_instruction *i = b->phi; | |||
| struct nv_basic_block *bb[64], *bbb[16], *b; | |||
| int j, p, pp; | |||
| bb[0] = root; | |||
| p = 1; | |||
| pp = 0; | |||
| while (p > 0) { | |||
| b = bb[--p]; | |||
| b->priv = 0; | |||
| for (j = 1; j >= 0; --j) { | |||
| if (!b->out[j]) | |||
| continue; | |||
| switch (b->out_kind[j]) { | |||
| case CFG_EDGE_BACK: | |||
| continue; | |||
| case CFG_EDGE_FORWARD: | |||
| if (++b->out[j]->priv == b->out[j]->num_in) | |||
| bb[p++] = b->out[j]; | |||
| break; | |||
| case CFG_EDGE_LOOP_ENTER: | |||
| bb[p++] = b->out[j]; | |||
| break; | |||
| case CFG_EDGE_LOOP_LEAVE: | |||
| bbb[pp++] = b->out[j]; | |||
| break; | |||
| default: | |||
| assert(0); | |||
| break; | |||
| } | |||
| } | |||
| f(priv, b); | |||
| b->priv = 0; | |||
| if (!p) | |||
| while (pp > 0) | |||
| bb[p++] = bbb[--pp]; | |||
| } | |||
| } | |||
| static void | |||
| nv_do_print_program(void *priv, struct nv_basic_block *b) | |||
| { | |||
| struct nv_instruction *i = b->phi; | |||
| debug_printf("=== BB %i ", b->id); | |||
| if (b->out[0]) | |||
| debug_printf("(--0> %i) ", b->out[0]->id); | |||
| debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); | |||
| if (b->out[1]) | |||
| debug_printf("(--1> %i) ", b->out[1]->id); | |||
| debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); | |||
| debug_printf("===\n"); | |||
| i = b->phi; | |||
| if (!i) | |||
| i = b->entry; | |||
| for (; i; i = i->next) | |||
| nv_print_instruction(i); | |||
| } | |||
| if (!b->out[0]) { | |||
| debug_printf("END\n\n"); | |||
| return; | |||
| } | |||
| if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in) | |||
| return; | |||
| if (b->out[0] != b) | |||
| nv_print_program(b->out[0]); | |||
| void | |||
| nv_print_program(struct nv_basic_block *root) | |||
| { | |||
| nv_pc_pass_in_order(root, nv_do_print_program, root); | |||
| if (b->out[1] && b->out[1] != b) | |||
| nv_print_program(b->out[1]); | |||
| debug_printf("END\n\n"); | |||
| } | |||
| static INLINE void | |||
| @@ -254,7 +313,7 @@ nv50_emit_program(struct nv_pc *pc) | |||
| assert(pc->emit == &code[pc->bin_size / 4]); | |||
| /* XXX: we can do better than this ... */ | |||
| if ((pc->emit[-2] & 2) || (pc->emit[-1] & 3) == 3) { | |||
| if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3) == 3) { | |||
| pc->emit[0] = 0xf0000001; | |||
| pc->emit[1] = 0xe0000000; | |||
| pc->bin_size += 8; | |||
| @@ -281,6 +340,7 @@ nv50_generate_code(struct nv50_translation_info *ti) | |||
| ret = nv50_tgsi_to_nc(pc, ti); | |||
| if (ret) | |||
| goto out; | |||
| nv_print_program(pc->root); | |||
| /* optimization */ | |||
| ret = nv_pc_exec_pass0(pc); | |||
| @@ -454,30 +514,40 @@ nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2) | |||
| i1->next->prev = i1; | |||
| } | |||
| void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *b) | |||
| void | |||
| nvbb_attach_block(struct nv_basic_block *parent, | |||
| struct nv_basic_block *b, ubyte edge_kind) | |||
| { | |||
| assert(b->num_in < 8); | |||
| if (parent->out[0]) { | |||
| assert(!parent->out[1]); | |||
| parent->out[1] = b; | |||
| } else | |||
| parent->out_kind[1] = edge_kind; | |||
| } else { | |||
| parent->out[0] = b; | |||
| parent->out_kind[0] = edge_kind; | |||
| } | |||
| b->in[b->num_in++] = parent; | |||
| b->in[b->num_in] = parent; | |||
| b->in_kind[b->num_in++] = edge_kind; | |||
| } | |||
| int | |||
| /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ | |||
| boolean | |||
| nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) | |||
| { | |||
| int j, n; | |||
| int j; | |||
| if (b == d) | |||
| return 1; | |||
| return TRUE; | |||
| n = 0; | |||
| for (j = 0; j < b->num_in; ++j) | |||
| n += nvbb_dominated_by(b->in[j], d); | |||
| if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d)) | |||
| return FALSE; | |||
| return (n && (n == b->num_in)) ? 1 : 0; | |||
| return j ? TRUE : FALSE; | |||
| } | |||
| /* check if bf (future) can be reached from bp (past) */ | |||
| @@ -490,27 +560,45 @@ nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, | |||
| if (bp == bt) | |||
| return FALSE; | |||
| if (bp->out[0] && bp->out[0] != bp && | |||
| if (bp->out[0] && bp->out_kind[0] != CFG_EDGE_BACK && | |||
| nvbb_reachable_by(bf, bp->out[0], bt)) | |||
| return TRUE; | |||
| if (bp->out[1] && bp->out[1] != bp && | |||
| if (bp->out[1] && bp->out_kind[1] != CFG_EDGE_BACK && | |||
| nvbb_reachable_by(bf, bp->out[1], bt)) | |||
| return TRUE; | |||
| return FALSE; | |||
| } | |||
| static struct nv_basic_block * | |||
| nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) | |||
| { | |||
| int i; | |||
| if (!nvbb_dominated_by(df, b)) { | |||
| for (i = 0; i < df->num_in; ++i) { | |||
| if (df->in_kind[i] == CFG_EDGE_BACK) | |||
| continue; | |||
| if (nvbb_dominated_by(df->in[i], b)) | |||
| return df; | |||
| } | |||
| } | |||
| for (i = 0; i < 2 && b->out[i]; ++i) { | |||
| if (b->out_kind[i] == CFG_EDGE_BACK) | |||
| continue; | |||
| if ((df = nvbb_find_dom_frontier(b, b->out[i]))) | |||
| return df; | |||
| } | |||
| return NULL; | |||
| } | |||
| struct nv_basic_block * | |||
| nvbb_dom_frontier(struct nv_basic_block *b) | |||
| { | |||
| struct nv_basic_block *df = b->out[0]; | |||
| assert(df); | |||
| while (nvbb_dominated_by(df, b) || | |||
| (!nvbb_dominated_by(df->in[0], b) && | |||
| (!df->in[1] || !nvbb_dominated_by(df->in[1], b)))) { | |||
| df = df->out[0]; | |||
| assert(df); | |||
| } | |||
| assert(df); | |||
| return df; | |||
| struct nv_basic_block *df; | |||
| int i; | |||
| for (i = 0; i < 2 && b->out[i]; ++i) | |||
| if ((df = nvbb_find_dom_frontier(b, b->out[i]))) | |||
| return df; | |||
| return NULL; | |||
| } | |||
| @@ -246,6 +246,11 @@ struct nv_instruction { | |||
| ubyte quadop; | |||
| }; | |||
| #define CFG_EDGE_FORWARD 0 | |||
| #define CFG_EDGE_BACK 1 | |||
| #define CFG_EDGE_LOOP_ENTER 2 | |||
| #define CFG_EDGE_LOOP_LEAVE 4 | |||
| struct nv_basic_block { | |||
| struct nv_instruction *entry; /* first non-phi instruction */ | |||
| struct nv_instruction *exit; | |||
| @@ -253,8 +258,10 @@ struct nv_basic_block { | |||
| int num_instructions; | |||
| struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ | |||
| struct nv_basic_block **in; | |||
| struct nv_basic_block *in[8]; /* hope that suffices */ | |||
| uint num_in; | |||
| ubyte out_kind[2]; | |||
| ubyte in_kind[8]; | |||
| int id; | |||
| struct nv_basic_block *last_visitor; | |||
| @@ -383,7 +390,6 @@ new_basic_block(struct nv_pc *pc) | |||
| { | |||
| struct nv_basic_block *bb = CALLOC_STRUCT(nv_basic_block); | |||
| bb->in = CALLOC(sizeof(struct nv_basic_block *), 4); | |||
| bb->id = pc->num_blocks++; | |||
| return bb; | |||
| } | |||
| @@ -414,6 +420,7 @@ const char *nv_opcode_name(uint opcode); | |||
| void nv_print_instruction(struct nv_instruction *); | |||
| /* nv50_pc.c */ | |||
| void nv_print_program(struct nv_basic_block *b); | |||
| boolean nv_op_commutative(uint opcode); | |||
| @@ -424,14 +431,19 @@ ubyte nv50_supported_src_mods(uint opcode, int s); | |||
| int nv_nvi_refcount(struct nv_instruction *); | |||
| void nv_nvi_delete(struct nv_instruction *); | |||
| void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *); | |||
| void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *); | |||
| int nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *); | |||
| void nvbb_attach_block(struct nv_basic_block *parent, | |||
| struct nv_basic_block *, ubyte edge_kind); | |||
| boolean nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *); | |||
| boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *, | |||
| struct nv_basic_block *); | |||
| struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *); | |||
| int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val, | |||
| struct nv_value *new_val); | |||
| typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); | |||
| void nv_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); | |||
| int nv_pc_exec_pass0(struct nv_pc *pc); | |||
| int nv_pc_exec_pass1(struct nv_pc *pc); | |||
| int nv_pc_exec_pass2(struct nv_pc *pc); | |||
| @@ -694,7 +694,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op) | |||
| set_pred(pc, i); | |||
| if (i->target) { | |||
| if (i->target && (i->opcode != NV_OP_BREAK)) { | |||
| new_fixup(pc, NV_FIXUP_CFLOW_RELOC, i->target->bin_pos, 0x7ff800, 11); | |||
| pc->emit[0] |= (i->target->bin_pos / 4) << 11; | |||
| } | |||
| @@ -120,15 +120,14 @@ nvi_isnop(struct nv_instruction *nvi) | |||
| } | |||
| static void | |||
| nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b) | |||
| nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) | |||
| { | |||
| struct nv_pc *pc = (struct nv_pc *)priv; | |||
| struct nv_basic_block *in; | |||
| struct nv_instruction *nvi, *next; | |||
| int j; | |||
| uint size, n32 = 0; | |||
| b->priv = 0; | |||
| for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j); | |||
| if (j >= 0) { | |||
| in = pc->bb_list[j]; | |||
| @@ -200,17 +199,6 @@ nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b) | |||
| assert(!b->entry || (b->exit && b->exit->is_long)); | |||
| pc->bin_size += b->bin_size *= 4; | |||
| /* descend CFG */ | |||
| if (!b->out[0]) | |||
| return; | |||
| if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in) | |||
| return; | |||
| for (j = 0; j < 2; ++j) | |||
| if (b->out[j] && b->out[j] != b) | |||
| nv_pc_pass_pre_emission(pc, b->out[j]); | |||
| } | |||
| int | |||
| @@ -219,9 +207,9 @@ nv_pc_exec_pass2(struct nv_pc *pc) | |||
| debug_printf("preparing %u blocks for emission\n", pc->num_blocks); | |||
| pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *)); | |||
| pc->num_blocks = 0; | |||
| nv_pc_pass_pre_emission(pc, pc->root); | |||
| nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc); | |||
| return 0; | |||
| } | |||
| @@ -307,8 +295,11 @@ nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b) | |||
| if (nvi->def[0]->refc > 1) | |||
| continue; | |||
| /* cannot MOV immediate to $oX */ | |||
| if (nvi->src[0]->value->reg.file == NV_FILE_IMM) | |||
| /* cannot write to $oX when using immediate */ | |||
| for (j = 0; j < 4 && nvi->src[j]; ++j) | |||
| if (nvi->src[j]->value->reg.file == NV_FILE_IMM) | |||
| break; | |||
| if (j < 4) | |||
| continue; | |||
| nvi->def[0] = sti->def[0]; | |||
| @@ -339,7 +330,6 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) | |||
| if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) { | |||
| nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value); | |||
| debug_printf("folded immediate %i\n", ld->def[0]->n); | |||
| continue; | |||
| } | |||
| @@ -358,6 +358,18 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) | |||
| do_join_values(ctx, a, b); | |||
| } | |||
| static INLINE boolean | |||
| need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) | |||
| { | |||
| int i = 0, n = 0; | |||
| for (; i < 2; ++i) | |||
| if (p->out[i] && p->out_kind[i] != CFG_EDGE_LOOP_LEAVE) | |||
| ++n; | |||
| return (b->num_in > 1) && (n == 2); | |||
| } | |||
| /* For each operand of each PHI in b, generate a new value by inserting a MOV | |||
| * at the end of the block it is coming from and replace the operand with its | |||
| * result. This eliminates liveness conflicts and enables us to let values be | |||
| @@ -377,7 +389,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) | |||
| p = pn = b->in[n]; | |||
| assert(p); | |||
| if (b->num_in > 1 && p->out[0] && p->out[1]) { | |||
| if (need_new_else_block(b, p)) { | |||
| pn = new_basic_block(ctx->pc); | |||
| if (p->out[0] == b) | |||
| @@ -481,32 +493,19 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) | |||
| } | |||
| /* Order the instructions so that live intervals can be expressed in numbers. */ | |||
| static int | |||
| pass_order_instructions(struct nv_pc_pass *ctx, struct nv_basic_block *b) | |||
| static void | |||
| pass_order_instructions(void *priv, struct nv_basic_block *b) | |||
| { | |||
| struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; | |||
| struct nv_instruction *i; | |||
| b->priv = 0; | |||
| b->pass_seq = ctx->pc->pass_seq; | |||
| assert(!b->exit || !b->exit->next); | |||
| for (i = b->phi; i; i = i->next) { | |||
| i->serial = ctx->num_insns; | |||
| ctx->insns[ctx->num_insns++] = i; | |||
| } | |||
| b->pass_seq = ctx->pc->pass_seq; | |||
| if (!b->out[0]) | |||
| return 0; | |||
| if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in) | |||
| return 0; | |||
| if (b->out[0] != b) | |||
| pass_order_instructions(ctx, b->out[0]); | |||
| if (b->out[1] && b->out[1] != b) | |||
| pass_order_instructions(ctx, b->out[1]); | |||
| return 0; | |||
| } | |||
| static void | |||
| @@ -691,13 +690,15 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) | |||
| } | |||
| /* remaining live-outs are live until the end */ | |||
| for (j = 0; j < ctx->pc->num_values; ++j) { | |||
| if (!(b->live_set[j / 32] & (1 << (j % 32)))) | |||
| continue; | |||
| if (b->exit) { | |||
| for (j = 0; j < ctx->pc->num_values; ++j) { | |||
| if (!(b->live_set[j / 32] & (1 << (j % 32)))) | |||
| continue; | |||
| #ifdef NV50_RA_DEBUG_LIVEI | |||
| debug_printf("adding range for live value %i\n", j); | |||
| debug_printf("adding range for live value %i\n", j); | |||
| #endif | |||
| add_range(&ctx->pc->values[j], b, b->exit->serial + 1); | |||
| add_range(&ctx->pc->values[j], b, b->exit->serial + 1); | |||
| } | |||
| } | |||
| debug_printf("%s: looping through instructions now\n", __func__); | |||
| @@ -905,10 +906,7 @@ nv_pc_exec_pass1(struct nv_pc *pc) | |||
| } | |||
| pc->pass_seq++; | |||
| ret = pass_order_instructions(ctx, pc->root); | |||
| assert(!ret && "order instructions"); | |||
| if (ret) | |||
| goto out; | |||
| nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx); | |||
| pc->pass_seq++; | |||
| ret = pass_build_intervals(ctx, pc->root); | |||
| @@ -27,6 +27,7 @@ | |||
| #include "pipe/p_shader_tokens.h" | |||
| #include "tgsi/tgsi_parse.h" | |||
| #include "tgsi/tgsi_util.h" | |||
| #include "tgsi/tgsi_dump.h" | |||
| static INLINE unsigned | |||
| bitcount4(const uint32_t val) | |||
| @@ -186,6 +187,8 @@ prog_immediate(struct nv50_translation_info *ti, | |||
| int c; | |||
| unsigned n = ++ti->immd32_nr; | |||
| tgsi_dump_immediate(imm); | |||
| if (n == (1 << (ffs(n) - 1))) | |||
| ti->immd32 = REALLOC(ti->immd32, (n / 2) * 16, (n * 2) * 16); | |||
| @@ -92,6 +92,15 @@ struct nv50_program { | |||
| #define NV50_INTERP_FLAT (1 << 1) | |||
| #define NV50_INTERP_CENTROID (1 << 2) | |||
| #define NV50_PROG_MAX_SUBROUTINES 8 | |||
| /* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ | |||
| struct nv50_subroutine { | |||
| int id; | |||
| uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */ | |||
| uint32_t retv[4][1]; | |||
| }; | |||
| struct nv50_translation_info { | |||
| struct nv50_program *p; | |||
| unsigned inst_nr; | |||
| @@ -108,6 +117,8 @@ struct nv50_translation_info { | |||
| uint32_t *immd32; | |||
| unsigned immd32_nr; | |||
| ubyte edgeflag_out; | |||
| struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES]; | |||
| int subr_nr; | |||
| }; | |||
| int nv50_generate_code(struct nv50_translation_info *ti); | |||
| @@ -22,6 +22,19 @@ | |||
| /* XXX: need to clean this up so we get the typecasting right more naturally */ | |||
| /* LOOP FIXME 1 | |||
| * In bld_store_loop_var, only replace values that belong to the TGSI register | |||
| * written. | |||
| * For TGSI MOV, we only associate the source value with the value tracker of | |||
| * the destination, instead of generating an actual MOV. | |||
| * | |||
| * Possible solution: generate PHI functions in loop headers in advance. | |||
| */ | |||
| /* LOOP FIXME 2: | |||
| * In fetch_by_bb, when going back through a break-block, we miss all of the | |||
| * definitions from inside the loop. | |||
| */ | |||
| #include <unistd.h> | |||
| #include "nv50_context.h" | |||
| @@ -48,6 +61,8 @@ struct bld_value_stack { | |||
| struct nv_value *top; | |||
| struct nv_value **body; | |||
| unsigned size; | |||
| uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */ | |||
| uint16_t loop_def; | |||
| }; | |||
| static INLINE void | |||
| @@ -81,19 +96,6 @@ bld_push_values(struct bld_value_stack *stacks, int n) | |||
| bld_vals_push(&stacks[i * 4 + c]); | |||
| } | |||
| #define FETCH_TEMP(i, c) (bld->tvs[i][c].top) | |||
| #define STORE_TEMP(i, c, v) (bld->tvs[i][c].top = (v)) | |||
| #define FETCH_ADDR(i, c) (bld->avs[i][c].top) | |||
| #define STORE_ADDR(i, c, v) (bld->avs[i][c].top = (v)) | |||
| #define FETCH_PRED(i, c) (bld->pvs[i][c].top) | |||
| #define STORE_PRED(i, c, v) (bld->pvs[i][c].top = (v)) | |||
| #define FETCH_OUTR(i, c) (bld->ovs[i][c].top) | |||
| #define STORE_OUTR(i, c, v) \ | |||
| do { \ | |||
| bld->ovs[i][c].top = (v); \ | |||
| bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ | |||
| } while (0) | |||
| struct bld_context { | |||
| struct nv50_translation_info *ti; | |||
| @@ -108,6 +110,7 @@ struct bld_context { | |||
| struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; | |||
| int cond_lvl; | |||
| struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; | |||
| struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; | |||
| int loop_lvl; | |||
| struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ | |||
| @@ -127,6 +130,51 @@ struct bld_context { | |||
| uint num_immds; | |||
| }; | |||
| static INLINE struct nv_value * | |||
| bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c) | |||
| { | |||
| stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl; | |||
| return stk[i * 4 + c].top; | |||
| } | |||
| static void | |||
| bld_store_loop_var(struct bld_context *, struct bld_value_stack *); | |||
| static INLINE void | |||
| bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c, | |||
| struct nv_value *val) | |||
| { | |||
| bld_store_loop_var(bld, &stk[i * 4 + c]); | |||
| stk[i * 4 + c].top = val; | |||
| } | |||
| static INLINE void | |||
| bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl) | |||
| { | |||
| int i; | |||
| const uint16_t mask = ~(1 << lvl); | |||
| for (i = 0; i < n * 4; ++i) { | |||
| stk[i].loop_def &= mask; | |||
| stk[i].loop_use &= mask; | |||
| } | |||
| } | |||
| #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c) | |||
| #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) | |||
| #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c) | |||
| #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) | |||
| #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c) | |||
| #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) | |||
| #define STORE_OUTR(i, c, v) \ | |||
| do { \ | |||
| bld->ovs[i][c].top = (v); \ | |||
| bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ | |||
| } while (0) | |||
| static INLINE void | |||
| bld_warn_uninitialized(struct bld_context *bld, int kind, | |||
| struct bld_value_stack *stk, struct nv_basic_block *b) | |||
| @@ -134,8 +182,8 @@ bld_warn_uninitialized(struct bld_context *bld, int kind, | |||
| long i = (stk - &bld->tvs[0][0]) / 4; | |||
| long c = (stk - &bld->tvs[0][0]) & 3; | |||
| debug_printf("WARNING: TEMP[%li].%li %s used uninitialized in BB:%i\n", | |||
| i, c, kind ? "may be" : "is", b->id); | |||
| debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", | |||
| i, (int)('x' + c), kind ? "may be" : "is", b->id); | |||
| } | |||
| static INLINE struct nv_value * | |||
| @@ -182,7 +230,8 @@ fetch_by_bb(struct bld_value_stack *stack, | |||
| return; | |||
| } | |||
| for (i = 0; i < b->num_in; ++i) | |||
| fetch_by_bb(stack, vals, n, b->in[i]); | |||
| if (b->in_kind[i] != CFG_EDGE_BACK) | |||
| fetch_by_bb(stack, vals, n, b->in[i]); | |||
| } | |||
| static INLINE struct nv_value * | |||
| @@ -237,12 +286,15 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b, | |||
| } | |||
| for (i = 0; i < n; ++i) { | |||
| /* if value dominates b, continue to the redefinitions */ | |||
| if (nvbb_dominated_by(b, vals[i]->insn->bb)) | |||
| continue; | |||
| /* if value dominates any in-block, b should be the dom frontier */ | |||
| for (j = 0; j < b->num_in; ++j) | |||
| if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb)) | |||
| break; | |||
| /* otherwise, find the dominance frontier and put the phi there */ | |||
| if (j == b->num_in) { | |||
| in = nvbb_dom_frontier(vals[i]->insn->bb); | |||
| val = bld_phi(bld, in, stack); | |||
| @@ -269,6 +321,7 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b, | |||
| static INLINE struct nv_value * | |||
| bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack) | |||
| { | |||
| stack->loop_use |= 1 << bld->loop_lvl; | |||
| return bld_phi(bld, bld->pc->current_block, stack); | |||
| } | |||
| @@ -290,6 +343,79 @@ bld_imm_u32(struct bld_context *bld, uint32_t u) | |||
| return bld->saved_immd[n]; | |||
| } | |||
| static void | |||
| bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, | |||
| struct nv_value *); | |||
| /* When setting a variable inside a loop, and we have used it before in the | |||
| * loop, we need to insert a phi function in the loop header. | |||
| */ | |||
| static void | |||
| bld_store_loop_var(struct bld_context *bld, struct bld_value_stack *stk) | |||
| { | |||
| struct nv_basic_block *bb; | |||
| struct nv_instruction *phi; | |||
| struct nv_value *val; | |||
| int ll; | |||
| uint16_t loop_def = stk->loop_def; | |||
| if (!(ll = bld->loop_lvl)) | |||
| return; | |||
| stk->loop_def |= 1 << ll; | |||
| if ((~stk->loop_use | loop_def) & (1 << ll)) | |||
| return; | |||
| #if 0 | |||
| debug_printf("TEMP[%li].%c used before loop redef (def=%x/use=%x)\n", | |||
| (stk - &bld->tvs[0][0]) / 4, | |||
| (int)('x' + ((stk - &bld->tvs[0][0]) & 3)), | |||
| loop_def, stk->loop_use); | |||
| #endif | |||
| stk->loop_def |= 1 << ll; | |||
| assert(bld->loop_bb[ll - 1]->num_in == 1); | |||
| /* get last assignment from outside this loop, could be from bld_phi */ | |||
| val = stk->body[stk->size - 1]; | |||
| /* create the phi in the loop entry block */ | |||
| bb = bld->pc->current_block; | |||
| bld->pc->current_block = bld->loop_bb[ll - 1]; | |||
| phi = new_instruction(bld->pc, NV_OP_PHI); | |||
| bld_def(phi, 0, new_value(bld->pc, val->reg.file, val->reg.type)); | |||
| bld->pc->pass_seq++; | |||
| bld_replace_value(bld->pc, bld->loop_bb[ll - 1], val, phi->def[0]); | |||
| assert(!stk->top); | |||
| bld_vals_push_val(stk, phi->def[0]); | |||
| phi->target = (struct nv_basic_block *)stk; /* cheat */ | |||
| nv_reference(bld->pc, &phi->src[0], val); | |||
| nv_reference(bld->pc, &phi->src[1], phi->def[0]); | |||
| bld->pc->current_block = bb; | |||
| } | |||
| static void | |||
| bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) | |||
| { | |||
| struct nv_instruction *phi; | |||
| struct nv_value *val; | |||
| for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = phi->next) { | |||
| val = bld_fetch_global(bld, (struct bld_value_stack *)phi->target); | |||
| nv_reference(bld->pc, &phi->src[1], val); | |||
| phi->target = NULL; | |||
| } | |||
| } | |||
| static INLINE struct nv_value * | |||
| bld_imm_f32(struct bld_context *bld, float f) | |||
| { | |||
| @@ -432,7 +558,8 @@ bld_kil(struct bld_context *bld, struct nv_value *src) | |||
| static void | |||
| bld_flow(struct bld_context *bld, uint opcode, ubyte cc, | |||
| struct nv_value *src, boolean plan_reconverge) | |||
| struct nv_value *src, struct nv_basic_block *target, | |||
| boolean plan_reconverge) | |||
| { | |||
| struct nv_instruction *nvi; | |||
| @@ -442,7 +569,9 @@ bld_flow(struct bld_context *bld, uint opcode, ubyte cc, | |||
| nvi = new_instruction(bld->pc, opcode); | |||
| nvi->is_terminator = 1; | |||
| nvi->cc = cc; | |||
| nvi->flags_src = new_ref(bld->pc, src); | |||
| nvi->target = target; | |||
| if (src) | |||
| nvi->flags_src = new_ref(bld->pc, src); | |||
| } | |||
| static ubyte | |||
| @@ -1105,14 +1234,14 @@ bld_instruction(struct bld_context *bld, | |||
| { | |||
| struct nv_basic_block *b = new_basic_block(bld->pc); | |||
| nvbb_attach_block(bld->pc->current_block, b); | |||
| nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD); | |||
| bld->join_bb[bld->cond_lvl] = bld->pc->current_block; | |||
| bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; | |||
| src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0)); | |||
| bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, FALSE); | |||
| bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, FALSE); | |||
| ++bld->cond_lvl; | |||
| bld_new_block(bld, b); | |||
| @@ -1123,7 +1252,7 @@ bld_instruction(struct bld_context *bld, | |||
| struct nv_basic_block *b = new_basic_block(bld->pc); | |||
| --bld->cond_lvl; | |||
| nvbb_attach_block(bld->join_bb[bld->cond_lvl], b); | |||
| nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); | |||
| bld->cond_bb[bld->cond_lvl]->exit->target = b; | |||
| bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; | |||
| @@ -1134,13 +1263,13 @@ bld_instruction(struct bld_context *bld, | |||
| bld_new_block(bld, b); | |||
| } | |||
| break; | |||
| case TGSI_OPCODE_ENDIF: /* XXX: deal with ENDIF; ENDIF; */ | |||
| case TGSI_OPCODE_ENDIF: | |||
| { | |||
| struct nv_basic_block *b = new_basic_block(bld->pc); | |||
| --bld->cond_lvl; | |||
| nvbb_attach_block(bld->pc->current_block, b); | |||
| nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b); | |||
| nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD); | |||
| nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); | |||
| bld->cond_bb[bld->cond_lvl]->exit->target = b; | |||
| @@ -1154,16 +1283,58 @@ bld_instruction(struct bld_context *bld, | |||
| } | |||
| break; | |||
| case TGSI_OPCODE_BGNLOOP: | |||
| assert(0); | |||
| { | |||
| struct nv_basic_block *bl = new_basic_block(bld->pc); | |||
| struct nv_basic_block *bb = new_basic_block(bld->pc); | |||
| bld->loop_bb[bld->loop_lvl] = bl; | |||
| bld->brkt_bb[bld->loop_lvl] = bb; | |||
| bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE); | |||
| nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); | |||
| bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); | |||
| if (bld->loop_lvl == bld->pc->loop_nesting_bound) | |||
| bld->pc->loop_nesting_bound++; | |||
| bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); | |||
| bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); | |||
| bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); | |||
| } | |||
| break; | |||
| case TGSI_OPCODE_BRK: | |||
| assert(0); | |||
| { | |||
| struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; | |||
| bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE); | |||
| /* XXX: don't do this for redundant BRKs */ | |||
| nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); | |||
| } | |||
| break; | |||
| case TGSI_OPCODE_CONT: | |||
| assert(0); | |||
| { | |||
| struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; | |||
| bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); | |||
| nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); | |||
| } | |||
| break; | |||
| case TGSI_OPCODE_ENDLOOP: | |||
| assert(0); | |||
| { | |||
| struct nv_basic_block *bb = bld->loop_bb[--bld->loop_lvl]; | |||
| bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); | |||
| nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); | |||
| bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ | |||
| bld_new_block(bld, bld->brkt_bb[bld->loop_lvl]); | |||
| } | |||
| break; | |||
| case TGSI_OPCODE_ABS: | |||
| case TGSI_OPCODE_CEIL: | |||
| @@ -1298,6 +1469,17 @@ bld_instruction(struct bld_context *bld, | |||
| emit_store(bld, insn, c, dst0[c]); | |||
| } | |||
| static INLINE void | |||
| bld_free_value_trackers(struct bld_value_stack *base, int n) | |||
| { | |||
| int i, c; | |||
| for (i = 0; i < n; ++i) | |||
| for (c = 0; c < 4; ++c) | |||
| if (base[i * 4 + c].body) | |||
| FREE(base[i * 4 + c].body); | |||
| } | |||
| int | |||
| nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti) | |||
| { | |||
| @@ -1309,7 +1491,7 @@ nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti) | |||
| bld->pc = pc; | |||
| bld->ti = ti; | |||
| pc->loop_nesting_bound = 1; /* XXX: should work with 0 */ | |||
| pc->loop_nesting_bound = 1; | |||
| c = util_bitcount(bld->ti->p->fp.interp >> 24); | |||
| if (c && ti->p->type == PIPE_SHADER_FRAGMENT) { | |||
| @@ -1335,18 +1517,23 @@ nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti) | |||
| } | |||
| } | |||
| bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS); | |||
| bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS); | |||
| bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS); | |||
| bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); | |||
| FREE(bld); | |||
| return 0; | |||
| } | |||
| #if 0 | |||
| /* If a variable is assigned in a loop, replace all references to the value | |||
| * from outside the loop with a phi value. | |||
| */ | |||
| static void | |||
| bld_adjust_nv_refs(struct nv_pc *pc, struct nv_basic_block *b, | |||
| struct nv_value *old_val, | |||
| struct nv_value *new_val) | |||
| bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, | |||
| struct nv_value *old_val, | |||
| struct nv_value *new_val) | |||
| { | |||
| struct nv_instruction *nvi; | |||
| @@ -1361,12 +1548,12 @@ bld_adjust_nv_refs(struct nv_pc *pc, struct nv_basic_block *b, | |||
| if (nvi->flags_src && nvi->flags_src->value == old_val) | |||
| nv_reference(pc, &nvi->flags_src, new_val); | |||
| } | |||
| b->pass_seq = pc->pass_seq; | |||
| if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) | |||
| bld_adjust_nv_refs(pc, b, old_val, new_val); | |||
| bld_replace_value(pc, b->out[0], old_val, new_val); | |||
| if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) | |||
| bld_adjust_nv_refs(pc, b, old_val, new_val); | |||
| bld_replace_value(pc, b->out[1], old_val, new_val); | |||
| } | |||
| #endif | |||