Signed-off-by: Rob Clark <robclark@freedesktop.org>tags/10.5-branchpoint
| @@ -128,6 +128,7 @@ ir3_SOURCES := \ | |||
| ir3/ir3_dump.c \ | |||
| ir3/ir3_flatten.c \ | |||
| ir3/ir3.h \ | |||
| ir3/ir3_legalize.c \ | |||
| ir3/ir3_ra.c \ | |||
| ir3/ir3_sched.c \ | |||
| ir3/ir3_shader.c \ | |||
| @@ -438,7 +438,10 @@ int ir3_block_sched(struct ir3_block *block); | |||
| /* register assignment: */ | |||
| int ir3_block_ra(struct ir3_block *block, enum shader_t type, | |||
| bool half_precision, bool frag_coord, bool frag_face, | |||
| bool half_precision, bool frag_coord, bool frag_face); | |||
| /* legalize: */ | |||
| void ir3_block_legalize(struct ir3_block *block, | |||
| bool *has_samp, int *max_bary); | |||
| #ifndef ARRAY_SIZE | |||
| @@ -3193,12 +3193,14 @@ ir3_compile_shader(struct ir3_shader_variant *so, | |||
| } | |||
| ret = ir3_block_ra(block, so->type, key.half_precision, | |||
| so->frag_coord, so->frag_face, &so->has_samp, &max_bary); | |||
| so->frag_coord, so->frag_face); | |||
| if (ret) { | |||
| DBG("RA failed!"); | |||
| goto out; | |||
| } | |||
| ir3_block_legalize(block, &so->has_samp, &max_bary); | |||
| if (fd_mesa_debug & FD_DBG_OPTMSGS) { | |||
| printf("AFTER RA:\n"); | |||
| ir3_dump_instr_list(block->head); | |||
| @@ -0,0 +1,205 @@ | |||
| /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ | |||
| /* | |||
| * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the "Software"), | |||
| * to deal in the Software without restriction, including without limitation | |||
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| * and/or sell copies of the Software, and to permit persons to whom the | |||
| * Software is furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the next | |||
| * paragraph) shall be included in all copies or substantial portions of the | |||
| * Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| * SOFTWARE. | |||
| * | |||
| * Authors: | |||
| * Rob Clark <robclark@freedesktop.org> | |||
| */ | |||
| #include "pipe/p_shader_tokens.h" | |||
| #include "util/u_math.h" | |||
| #include "freedreno_util.h" | |||
| #include "ir3.h" | |||
| /* | |||
| * Legalize: | |||
| * | |||
| * We currently require that scheduling ensures that we have enough nop's | |||
| * in all the right places. The legalize step mostly handles fixing up | |||
| * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's | |||
| * into fewer nop's w/ rpt flag. | |||
| */ | |||
| struct ir3_legalize_ctx { | |||
| struct ir3_block *block; | |||
| bool has_samp; | |||
| int max_bary; | |||
| }; | |||
| static void legalize(struct ir3_legalize_ctx *ctx) | |||
| { | |||
| struct ir3_block *block = ctx->block; | |||
| struct ir3_instruction *n; | |||
| struct ir3 *shader = block->shader; | |||
| struct ir3_instruction *end = | |||
| ir3_instr_create(block, 0, OPC_END); | |||
| struct ir3_instruction *last_input = NULL; | |||
| struct ir3_instruction *last_rel = NULL; | |||
| regmask_t needs_ss_war; /* write after read */ | |||
| regmask_t needs_ss; | |||
| regmask_t needs_sy; | |||
| regmask_init(&needs_ss_war); | |||
| regmask_init(&needs_ss); | |||
| regmask_init(&needs_sy); | |||
| shader->instrs_count = 0; | |||
| for (n = block->head; n; n = n->next) { | |||
| struct ir3_register *reg; | |||
| unsigned i; | |||
| if (is_meta(n)) | |||
| continue; | |||
| if (is_input(n)) { | |||
| struct ir3_register *inloc = n->regs[1]; | |||
| assert(inloc->flags & IR3_REG_IMMED); | |||
| ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); | |||
| } | |||
| for (i = 1; i < n->regs_count; i++) { | |||
| reg = n->regs[i]; | |||
| if (reg_gpr(reg)) { | |||
| /* TODO: we probably only need (ss) for alu | |||
| * instr consuming sfu result.. need to make | |||
| * some tests for both this and (sy).. | |||
| */ | |||
| if (regmask_get(&needs_ss, reg)) { | |||
| n->flags |= IR3_INSTR_SS; | |||
| regmask_init(&needs_ss); | |||
| } | |||
| if (regmask_get(&needs_sy, reg)) { | |||
| n->flags |= IR3_INSTR_SY; | |||
| regmask_init(&needs_sy); | |||
| } | |||
| } | |||
| /* TODO: is it valid to have address reg loaded from a | |||
| * relative src (ie. mova a0, c<a0.x+4>)? If so, the | |||
| * last_rel check below should be moved ahead of this: | |||
| */ | |||
| if (reg->flags & IR3_REG_RELATIV) | |||
| last_rel = n; | |||
| } | |||
| if (n->regs_count > 0) { | |||
| reg = n->regs[0]; | |||
| if (regmask_get(&needs_ss_war, reg)) { | |||
| n->flags |= IR3_INSTR_SS; | |||
| regmask_init(&needs_ss_war); // ??? I assume? | |||
| } | |||
| if (last_rel && (reg->num == regid(REG_A0, 0))) { | |||
| last_rel->flags |= IR3_INSTR_UL; | |||
| last_rel = NULL; | |||
| } | |||
| } | |||
| /* cat5+ does not have an (ss) bit, if needed we need to | |||
| * insert a nop to carry the sync flag. Would be kinda | |||
| * clever if we were aware of this during scheduling, but | |||
| * this should be a pretty rare case: | |||
| */ | |||
| if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { | |||
| struct ir3_instruction *nop; | |||
| nop = ir3_instr_create(block, 0, OPC_NOP); | |||
| nop->flags |= IR3_INSTR_SS; | |||
| n->flags &= ~IR3_INSTR_SS; | |||
| } | |||
| /* need to be able to set (ss) on first instruction: */ | |||
| if ((shader->instrs_count == 0) && (n->category >= 5)) | |||
| ir3_instr_create(block, 0, OPC_NOP); | |||
| if (is_nop(n) && shader->instrs_count) { | |||
| struct ir3_instruction *last = | |||
| shader->instrs[shader->instrs_count-1]; | |||
| if (is_nop(last) && (last->repeat < 5)) { | |||
| last->repeat++; | |||
| last->flags |= n->flags; | |||
| continue; | |||
| } | |||
| } | |||
| shader->instrs[shader->instrs_count++] = n; | |||
| if (is_sfu(n)) | |||
| regmask_set(&needs_ss, n->regs[0]); | |||
| if (is_tex(n)) { | |||
| /* this ends up being the # of samp instructions.. but that | |||
| * is ok, everything else only cares whether it is zero or | |||
| * not. We do this here, rather than when we encounter a | |||
| * SAMP decl, because (especially in binning pass shader) | |||
| * the samp instruction(s) could get eliminated if the | |||
| * result is not used. | |||
| */ | |||
| ctx->has_samp = true; | |||
| regmask_set(&needs_sy, n->regs[0]); | |||
| } | |||
| /* both tex/sfu appear to not always immediately consume | |||
| * their src register(s): | |||
| */ | |||
| if (is_tex(n) || is_sfu(n)) { | |||
| for (i = 1; i < n->regs_count; i++) { | |||
| reg = n->regs[i]; | |||
| if (reg_gpr(reg)) | |||
| regmask_set(&needs_ss_war, reg); | |||
| } | |||
| } | |||
| if (is_input(n)) | |||
| last_input = n; | |||
| } | |||
| if (last_input) | |||
| last_input->regs[0]->flags |= IR3_REG_EI; | |||
| if (last_rel) | |||
| last_rel->flags |= IR3_INSTR_UL; | |||
| shader->instrs[shader->instrs_count++] = end; | |||
| shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; | |||
| } | |||
| void ir3_block_legalize(struct ir3_block *block, | |||
| bool *has_samp, int *max_bary) | |||
| { | |||
| struct ir3_legalize_ctx ctx = { | |||
| .block = block, | |||
| .max_bary = -1, | |||
| }; | |||
| legalize(&ctx); | |||
| *has_samp = ctx.has_samp; | |||
| *max_bary = ctx.max_bary; | |||
| } | |||
| @@ -56,9 +56,7 @@ struct ir3_ra_ctx { | |||
| bool half_precision; | |||
| bool frag_coord; | |||
| bool frag_face; | |||
| bool has_samp; | |||
| int cnt; | |||
| int max_bary; | |||
| bool error; | |||
| }; | |||
| @@ -603,149 +601,6 @@ static void ir3_instr_ra(struct ir3_ra_ctx *ctx, | |||
| ra_assign(ctx, instr, num); | |||
| } | |||
| /* flatten into shader: */ | |||
| // XXX this should probably be somewhere else: | |||
| static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) | |||
| { | |||
| struct ir3_instruction *n; | |||
| struct ir3 *shader = block->shader; | |||
| struct ir3_instruction *end = | |||
| ir3_instr_create(block, 0, OPC_END); | |||
| struct ir3_instruction *last_input = NULL; | |||
| struct ir3_instruction *last_rel = NULL; | |||
| regmask_t needs_ss_war; /* write after read */ | |||
| regmask_t needs_ss; | |||
| regmask_t needs_sy; | |||
| regmask_init(&needs_ss_war); | |||
| regmask_init(&needs_ss); | |||
| regmask_init(&needs_sy); | |||
| shader->instrs_count = 0; | |||
| for (n = block->head; n; n = n->next) { | |||
| struct ir3_register *reg; | |||
| unsigned i; | |||
| if (is_meta(n)) | |||
| continue; | |||
| if (is_input(n)) { | |||
| struct ir3_register *inloc = n->regs[1]; | |||
| assert(inloc->flags & IR3_REG_IMMED); | |||
| ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); | |||
| } | |||
| for (i = 1; i < n->regs_count; i++) { | |||
| reg = n->regs[i]; | |||
| if (reg_gpr(reg)) { | |||
| /* TODO: we probably only need (ss) for alu | |||
| * instr consuming sfu result.. need to make | |||
| * some tests for both this and (sy).. | |||
| */ | |||
| if (regmask_get(&needs_ss, reg)) { | |||
| n->flags |= IR3_INSTR_SS; | |||
| regmask_init(&needs_ss); | |||
| } | |||
| if (regmask_get(&needs_sy, reg)) { | |||
| n->flags |= IR3_INSTR_SY; | |||
| regmask_init(&needs_sy); | |||
| } | |||
| } | |||
| /* TODO: is it valid to have address reg loaded from a | |||
| * relative src (ie. mova a0, c<a0.x+4>)? If so, the | |||
| * last_rel check below should be moved ahead of this: | |||
| */ | |||
| if (reg->flags & IR3_REG_RELATIV) | |||
| last_rel = n; | |||
| } | |||
| if (n->regs_count > 0) { | |||
| reg = n->regs[0]; | |||
| if (regmask_get(&needs_ss_war, reg)) { | |||
| n->flags |= IR3_INSTR_SS; | |||
| regmask_init(&needs_ss_war); // ??? I assume? | |||
| } | |||
| if (last_rel && (reg->num == regid(REG_A0, 0))) { | |||
| last_rel->flags |= IR3_INSTR_UL; | |||
| last_rel = NULL; | |||
| } | |||
| } | |||
| /* cat5+ does not have an (ss) bit, if needed we need to | |||
| * insert a nop to carry the sync flag. Would be kinda | |||
| * clever if we were aware of this during scheduling, but | |||
| * this should be a pretty rare case: | |||
| */ | |||
| if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { | |||
| struct ir3_instruction *nop; | |||
| nop = ir3_instr_create(block, 0, OPC_NOP); | |||
| nop->flags |= IR3_INSTR_SS; | |||
| n->flags &= ~IR3_INSTR_SS; | |||
| } | |||
| /* need to be able to set (ss) on first instruction: */ | |||
| if ((shader->instrs_count == 0) && (n->category >= 5)) | |||
| ir3_instr_create(block, 0, OPC_NOP); | |||
| if (is_nop(n) && shader->instrs_count) { | |||
| struct ir3_instruction *last = | |||
| shader->instrs[shader->instrs_count-1]; | |||
| if (is_nop(last) && (last->repeat < 5)) { | |||
| last->repeat++; | |||
| last->flags |= n->flags; | |||
| continue; | |||
| } | |||
| } | |||
| shader->instrs[shader->instrs_count++] = n; | |||
| if (is_sfu(n)) | |||
| regmask_set(&needs_ss, n->regs[0]); | |||
| if (is_tex(n)) { | |||
| /* this ends up being the # of samp instructions.. but that | |||
| * is ok, everything else only cares whether it is zero or | |||
| * not. We do this here, rather than when we encounter a | |||
| * SAMP decl, because (especially in binning pass shader) | |||
| * the samp instruction(s) could get eliminated if the | |||
| * result is not used. | |||
| */ | |||
| ctx->has_samp = true; | |||
| regmask_set(&needs_sy, n->regs[0]); | |||
| } | |||
| /* both tex/sfu appear to not always immediately consume | |||
| * their src register(s): | |||
| */ | |||
| if (is_tex(n) || is_sfu(n)) { | |||
| for (i = 1; i < n->regs_count; i++) { | |||
| reg = n->regs[i]; | |||
| if (reg_gpr(reg)) | |||
| regmask_set(&needs_ss_war, reg); | |||
| } | |||
| } | |||
| if (is_input(n)) | |||
| last_input = n; | |||
| } | |||
| if (last_input) | |||
| last_input->regs[0]->flags |= IR3_REG_EI; | |||
| if (last_rel) | |||
| last_rel->flags |= IR3_INSTR_UL; | |||
| shader->instrs[shader->instrs_count++] = end; | |||
| shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; | |||
| } | |||
| static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) | |||
| { | |||
| struct ir3_instruction *n; | |||
| @@ -794,14 +649,11 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) | |||
| ra_dump_list("-------", block->head); | |||
| } | |||
| legalize(ctx, block); | |||
| return 0; | |||
| } | |||
| int ir3_block_ra(struct ir3_block *block, enum shader_t type, | |||
| bool half_precision, bool frag_coord, bool frag_face, | |||
| bool *has_samp, int *max_bary) | |||
| bool half_precision, bool frag_coord, bool frag_face) | |||
| { | |||
| struct ir3_instruction *n; | |||
| struct ir3_ra_ctx ctx = { | |||
| @@ -810,7 +662,6 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, | |||
| .half_precision = half_precision, | |||
| .frag_coord = frag_coord, | |||
| .frag_face = frag_face, | |||
| .max_bary = -1, | |||
| }; | |||
| int ret; | |||
| @@ -823,8 +674,6 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, | |||
| ir3_clear_mark(block->shader); | |||
| ret = block_ra(&ctx, block); | |||
| *has_samp = ctx.has_samp; | |||
| *max_bary = ctx.max_bary; | |||
| return ret; | |||
| } | |||