123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524 |
- /**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-
- /**
- * Generate SPU fragment program/shader code.
- *
- * Note that we generate SOA-style code here. So each TGSI instruction
- * operates on four pixels (and is translated into four SPU instructions,
- * generally speaking).
- *
- * \author Brian Paul
- */
-
-
- #include "pipe/p_defines.h"
- #include "pipe/p_state.h"
- #include "pipe/p_shader_tokens.h"
- #include "tgsi/tgsi_parse.h"
- #include "tgsi/tgsi_util.h"
- #include "tgsi/tgsi_exec.h"
- #include "tgsi/tgsi_dump.h"
- #include "rtasm/rtasm_ppc_spe.h"
- #include "util/u_memory.h"
- #include "cell_context.h"
- #include "cell_gen_fp.h"
-
-
- /** Set to 1 to enable debug/disassembly printfs */
- #define DISASSEM 01
-
-
- /**
- * Context needed during code generation.
- */
- struct codegen
- {
- int inputs_reg; /**< 1st function parameter */
- int outputs_reg; /**< 2nd function parameter */
- int constants_reg; /**< 3rd function parameter */
- int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
-
- int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
-
- /** Per-instruction temps / intermediate temps */
- int num_itemps;
- int itemps[3];
-
- struct spe_function *f;
- boolean error;
- };
-
-
- /**
- * Allocate an intermediate temporary register.
- */
- static int
- get_itemp(struct codegen *gen)
- {
- int t = spe_allocate_available_register(gen->f);
- assert(gen->num_itemps < Elements(gen->itemps));
- gen->itemps[gen->num_itemps++] = t;
- return t;
- }
-
- /**
- * Free all intermediate temporary registers. To be called after each
- * instruction has been emitted.
- */
- static void
- free_itemps(struct codegen *gen)
- {
- int i;
- for (i = 0; i < gen->num_itemps; i++) {
- spe_release_register(gen->f, gen->itemps[i]);
- }
- gen->num_itemps = 0;
- }
-
-
- /**
- * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
- * The register is allocated and initialized upon the first call.
- */
- static int
- get_const_one_reg(struct codegen *gen)
- {
- if (gen->one_reg <= 0) {
- gen->one_reg = spe_allocate_available_register(gen->f);
- }
-
- /* one = {1.0, 1.0, 1.0, 1.0} */
- spe_load_float(gen->f, gen->one_reg, 1.0f);
- #if DISASSEM
- printf("il\tr%d, 1.0f\n", gen->one_reg);
- #endif
-
- return gen->one_reg;
- }
-
-
- /**
- * Return the index of the SPU temporary containing the named TGSI
- * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
- * just return the corresponding SPE register. If the TGIS register
- * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
- * and emit an SPE load instruction.
- */
- static int
- get_src_reg(struct codegen *gen,
- int channel,
- const struct tgsi_full_src_register *src)
- {
- int reg;
- int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
-
- assert(swizzle >= 0);
- assert(swizzle <= 3);
-
- channel = swizzle;
-
- switch (src->SrcRegister.File) {
- case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[src->SrcRegister.Index][channel];
- break;
- case TGSI_FILE_INPUT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = src->SrcRegister.Index * 4 + channel;
- reg = get_itemp(gen);
- /* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset);
- #if DISASSEM
- printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
- #endif
- }
- break;
- case TGSI_FILE_IMMEDIATE:
- /* xxx fall-through for now / fix */
- case TGSI_FILE_CONSTANT:
- /* xxx fall-through for now / fix */
- default:
- assert(0);
- }
-
- return reg;
- }
-
-
- /**
- * Return the index of an SPE register to use for the given TGSI register.
- * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
- * corresponding SPE register is returned. If the TGSI register is
- * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
- * See store_dest_reg() below...
- */
- static int
- get_dst_reg(struct codegen *gen,
- int channel,
- const struct tgsi_full_dst_register *dest)
- {
- int reg;
-
- switch (dest->DstRegister.File) {
- case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[dest->DstRegister.Index][channel];
- break;
- case TGSI_FILE_OUTPUT:
- reg = get_itemp(gen);
- break;
- default:
- assert(0);
- }
-
- return reg;
- }
-
-
- /**
- * When a TGSI instruction is writing to an output register, this
- * function emits the SPE store instruction to store the value_reg.
- * \param value_reg the SPE register containing the value to store.
- * This would have been returned by get_dst_reg().
- */
- static void
- store_dest_reg(struct codegen *gen,
- int value_reg, int channel,
- const struct tgsi_full_dst_register *dest)
- {
- switch (dest->DstRegister.File) {
- case TGSI_FILE_TEMPORARY:
- /* no-op */
- break;
- case TGSI_FILE_OUTPUT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = dest->DstRegister.Index * 4 + channel;
- /* Store: memory[(machine_reg) + offset] = reg */
- spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
- #if DISASSEM
- printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
- #endif
- }
- break;
- default:
- assert(0);
- }
- }
-
-
- static boolean
- emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
- {
- int ch;
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* XXX we don't always need to actually emit a mov instruction here */
- spe_move(gen->f, dst_reg, src_reg);
- #if DISASSEM
- printf("mov\tr%d, r%d\n", dst_reg, src_reg);
- #endif
- store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
- }
- return true;
- }
-
-
- /**
- * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
- * becomes (up to) four SPU "fa" instructions because we're doing SOA
- * processing.
- */
- static boolean
- emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
- {
- int ch;
- /* Loop over Red/Green/Blue/Alpha channels */
- for (ch = 0; ch < 4; ch++) {
- /* If the dest R, G, B or A writemask is enabled... */
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* get indexes of the two src, one dest SPE registers */
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* Emit actual SPE instruction: d = s1 + s2 */
- spe_fa(gen->f, d_reg, s1_reg, s2_reg);
- #if DISASSEM
- printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
- #endif
-
- /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- /* Free any intermediate temps we allocated */
- free_itemps(gen);
- }
- }
- return true;
- }
-
-
- /**
- * Emit multiply. See emit_ADD for comments.
- */
- static boolean
- emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
- {
- int ch;
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* d = s1 * s2 */
- spe_fm(gen->f, d_reg, s1_reg, s2_reg);
- #if DISASSEM
- printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
- #endif
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
- }
- return true;
- }
-
-
- /**
- * Emit set-if-greater-than.
- * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
- * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
- * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
- */
- static boolean
- emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
- {
- int ch;
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 > s2) */
- spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
- #if DISASSEM
- printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
- #endif
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = d & one_reg */
- spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
- #if DISASSEM
- printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
- #endif
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
- }
-
- return true;
- }
-
-
- /**
- * Emit END instruction.
- * We just return from the shader function at this point.
- *
- * Note that there may be more code after this that would be
- * called by TGSI_OPCODE_CALL.
- */
- static boolean
- emit_END(struct codegen *gen)
- {
- /* return from function call */
- spe_bi(gen->f, SPE_REG_RA, 0, 0);
- #if DISASSEM
- printf("bi\trRA\n");
- #endif
- return true;
- }
-
-
- /**
- * Emit code for the given instruction. Just a big switch stmt.
- */
- static boolean
- emit_instruction(struct codegen *gen,
- const struct tgsi_full_instruction *inst)
- {
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_MOV:
- return emit_MOV(gen, inst);
- case TGSI_OPCODE_MUL:
- return emit_MUL(gen, inst);
- case TGSI_OPCODE_ADD:
- return emit_ADD(gen, inst);
- case TGSI_OPCODE_SGT:
- return emit_SGT(gen, inst);
- case TGSI_OPCODE_END:
- return emit_END(gen);
-
- /* XXX lots more cases to do... */
-
- default:
- return false;
- }
-
- return true;
- }
-
-
-
- /**
- * Emit "code" for a TGSI declaration.
- * We only care about TGSI TEMPORARY register declarations at this time.
- * For each TGSI TEMPORARY we allocate four SPE registers.
- */
- static void
- emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
- {
- int i, ch;
-
- switch (decl->Declaration.File) {
- case TGSI_FILE_TEMPORARY:
- #if DISASSEM
- printf("Declare temp reg %d .. %d\n",
- decl->DeclarationRange.First,
- decl->DeclarationRange.Last);
- #endif
- for (i = decl->DeclarationRange.First;
- i <= decl->DeclarationRange.Last;
- i++) {
- for (ch = 0; ch < 4; ch++) {
- gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
- }
-
- /* XXX if we run out of SPE registers, we need to spill
- * to SPU memory. someday...
- */
-
- #if DISASSEM
- printf(" SPE regs: %d %d %d %d\n",
- gen->temp_regs[i][0],
- gen->temp_regs[i][1],
- gen->temp_regs[i][2],
- gen->temp_regs[i][3]);
- #endif
- }
- break;
- default:
- ; /* ignore */
- }
- }
-
-
- /**
- * Translate TGSI shader code to SPE instructions. This is done when
- * the state tracker gives us a new shader (via pipe->create_fs_state()).
- *
- * \param cell the rendering context (in)
- * \param tokens the TGSI shader (in)
- * \param f the generated function (out)
- */
- boolean
- cell_gen_fragment_program(struct cell_context *cell,
- const struct tgsi_token *tokens,
- struct spe_function *f)
- {
- struct tgsi_parse_context parse;
- struct codegen gen;
-
- memset(&gen, 0, sizeof(gen));
- gen.f = f;
-
- /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
- gen.inputs_reg = 3; /* pointer to inputs array */
- gen.outputs_reg = 4; /* pointer to outputs array */
- gen.constants_reg = 5; /* pointer to constants array */
-
- spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
- spe_allocate_register(f, gen.inputs_reg);
- spe_allocate_register(f, gen.outputs_reg);
- spe_allocate_register(f, gen.constants_reg);
-
- #if DISASSEM
- printf("Begin %s\n", __FUNCTION__);
- tgsi_dump(tokens, 0);
- #endif
-
- tgsi_parse_init(&parse, tokens);
-
- while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
- tgsi_parse_token(&parse);
-
- switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- #if 0
- if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
- goto fail;
- #endif
- break;
-
- case TGSI_TOKEN_TYPE_DECLARATION:
- emit_declaration(&gen, &parse.FullToken.FullDeclaration);
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
- gen.error = true;
- }
- break;
-
- default:
- assert(0);
-
- }
- }
-
-
- if (gen.error) {
- /* terminate the SPE code */
- return emit_END(&gen);
- }
-
- #if DISASSEM
- printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
- printf("End %s\n", __FUNCTION__);
- #endif
-
- tgsi_parse_free( &parse );
-
- return !gen.error;
- }
|