Conflicts: progs/fp/Makefiletags/mesa_20090313
@@ -40,13 +40,13 @@ UTIL_FILES = readtex.h readtex.c | |||
.SUFFIXES: .c | |||
.c: | |||
$(CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $< $(LIBS) -o $@ | |||
$(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $< $(LIBS) -o $@ | |||
.c.o: | |||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
.S.o: | |||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
##### TARGETS ##### | |||
@@ -67,19 +67,27 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress. | |||
texrect: texrect.o readtex.o | |||
<<<<<<< HEAD:progs/fp/Makefile | |||
$(CC) $(CFLAGS) $(LDFLAGS) texrect.o readtex.o $(LIBS) -o $@ | |||
======= | |||
$(APP_CC) texrect.o readtex.o $(LIBS) -o $@ | |||
>>>>>>> origin/gallium-0.1:progs/fp/Makefile | |||
texrect.o: texrect.c readtex.h | |||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
invert: invert.o readtex.o | |||
<<<<<<< HEAD:progs/fp/Makefile | |||
$(CC) $(CFLAGS) $(LDFLAGS) invert.o readtex.o $(LIBS) -o $@ | |||
======= | |||
$(APP_CC) invert.o readtex.o $(LIBS) -o $@ | |||
>>>>>>> origin/gallium-0.1:progs/fp/Makefile | |||
invert.o: invert.c readtex.h | |||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
readtex.o: readtex.c | |||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ | |||
readtex.h: $(TOP)/progs/util/readtex.h |
@@ -8,7 +8,7 @@ include $(TOP)/configs/current | |||
INCLUDE_DIRS = \ | |||
-I. \ | |||
-I/usr/include \ | |||
-I/usr/include/drm \ | |||
$(shell pkg-config --cflags-only-I libdrm) \ | |||
-I$(TOP)/include \ | |||
-I$(TOP)/include/GL/internal \ | |||
-I$(TOP)/src/mesa \ |
@@ -13,7 +13,7 @@ DRIVER_NAME = egl_xdri.so | |||
INCLUDE_DIRS = \ | |||
-I. \ | |||
-I/usr/include \ | |||
-I/usr/include/drm \ | |||
$(shell pkg-config --cflags-only-I libdrm) \ | |||
-I$(TOP)/include \ | |||
-I$(TOP)/include/GL/internal \ | |||
-I$(TOP)/src/mesa/glapi \ | |||
@@ -48,6 +48,7 @@ $(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(OBJECTS) | |||
$(TOP)/bin/mklib -o $(DRIVER_NAME) \ | |||
-noprefix \ | |||
-major 1 -minor 0 \ | |||
-L $(TOP)/$(LIB_DIR) \ | |||
-install $(TOP)/$(LIB_DIR) \ | |||
$(OBJECTS) $(DRM_LIB) $(MISC_LIBS) | |||
@@ -473,21 +473,48 @@ EMIT_R (spe_mtspr, 0x10c); | |||
void | |||
spe_load_float(struct spe_function *p, unsigned rT, float x) | |||
{ | |||
union { | |||
float f; | |||
unsigned u; | |||
} bits; | |||
bits.f = x; | |||
spe_ilhu(p, rT, bits.u >> 16); | |||
spe_iohl(p, rT, bits.u & 0xffff); | |||
if (x == 0.0f) { | |||
spe_il(p, rT, 0x0); | |||
} | |||
else if (x == 0.5f) { | |||
spe_ilhu(p, rT, 0x3f00); | |||
} | |||
else if (x == 1.0f) { | |||
spe_ilhu(p, rT, 0x3f80); | |||
} | |||
else if (x == -1.0f) { | |||
spe_ilhu(p, rT, 0xbf80); | |||
} | |||
else { | |||
union { | |||
float f; | |||
unsigned u; | |||
} bits; | |||
bits.f = x; | |||
spe_ilhu(p, rT, bits.u >> 16); | |||
spe_iohl(p, rT, bits.u & 0xffff); | |||
} | |||
} | |||
void | |||
spe_load_int(struct spe_function *p, unsigned rT, int i) | |||
{ | |||
spe_ilhu(p, rT, i >> 16); | |||
spe_iohl(p, rT, i & 0xffff); | |||
if (-32768 <= i && i <= 32767) { | |||
spe_il(p, rT, i); | |||
} | |||
else { | |||
spe_ilhu(p, rT, i >> 16); | |||
spe_iohl(p, rT, i & 0xffff); | |||
} | |||
} | |||
void | |||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA) | |||
{ | |||
spe_ila(p, rT, 66051); | |||
spe_shufb(p, rT, rA, rA, rT); | |||
} | |||
@@ -292,6 +292,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x); | |||
extern void | |||
spe_load_int(struct spe_function *p, unsigned rT, int i); | |||
/** Replicate word 0 of rA across rT. */ | |||
extern void | |||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA); | |||
/** Complement/invert all bits in rT. */ | |||
extern void | |||
spe_complement(struct spe_function *p, unsigned rT); |
@@ -92,6 +92,7 @@ | |||
#define CELL_CMD_STATE_UNIFORMS 16 | |||
#define CELL_CMD_STATE_VS_ARRAY_INFO 17 | |||
#define CELL_CMD_STATE_BIND_VS 18 | |||
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 | |||
#define CELL_CMD_STATE_ATTRIB_FETCH 20 | |||
#define CELL_CMD_VS_EXECUTE 22 | |||
#define CELL_CMD_FLUSH_BUFFER_RANGE 23 | |||
@@ -125,6 +126,20 @@ struct cell_command_fragment_ops | |||
}; | |||
/** Max instructions for fragment programs */ | |||
#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128 | |||
/** | |||
* Command to send a fragment progra to SPUs. | |||
*/ | |||
struct cell_command_fragment_program | |||
{ | |||
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ | |||
uint num_inst; /**< Number of instructions */ | |||
unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; | |||
}; | |||
/** | |||
* Tell SPUs about the framebuffer size, location | |||
*/ |
@@ -26,6 +26,7 @@ SOURCES = \ | |||
cell_draw_arrays.c \ | |||
cell_flush.c \ | |||
cell_gen_fragment.c \ | |||
cell_gen_fp.c \ | |||
cell_state_derived.c \ | |||
cell_state_emit.c \ | |||
cell_state_shader.c \ |
@@ -61,6 +61,7 @@ struct cell_fragment_shader_state | |||
{ | |||
struct pipe_shader_state shader; | |||
struct tgsi_shader_info info; | |||
struct spe_function code; | |||
void *data; | |||
}; | |||
@@ -0,0 +1,523 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/** | |||
* Generate SPU fragment program/shader code. | |||
* | |||
* Note that we generate SOA-style code here. So each TGSI instruction | |||
* operates on four pixels (and is translated into four SPU instructions, | |||
* generally speaking). | |||
* | |||
* \author Brian Paul | |||
*/ | |||
#include "pipe/p_defines.h" | |||
#include "pipe/p_state.h" | |||
#include "pipe/p_shader_tokens.h" | |||
#include "tgsi/tgsi_parse.h" | |||
#include "tgsi/tgsi_util.h" | |||
#include "tgsi/tgsi_exec.h" | |||
#include "tgsi/tgsi_dump.h" | |||
#include "rtasm/rtasm_ppc_spe.h" | |||
#include "util/u_memory.h" | |||
#include "cell_context.h" | |||
#include "cell_gen_fp.h" | |||
/** Set to 1 to enable debug/disassembly printfs */ | |||
#define DISASSEM 01 | |||
/** | |||
* Context needed during code generation. | |||
*/ | |||
struct codegen | |||
{ | |||
int inputs_reg; /**< 1st function parameter */ | |||
int outputs_reg; /**< 2nd function parameter */ | |||
int constants_reg; /**< 3rd function parameter */ | |||
int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */ | |||
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ | |||
/** Per-instruction temps / intermediate temps */ | |||
int num_itemps; | |||
int itemps[3]; | |||
struct spe_function *f; | |||
boolean error; | |||
}; | |||
/** | |||
* Allocate an intermediate temporary register. | |||
*/ | |||
static int | |||
get_itemp(struct codegen *gen) | |||
{ | |||
int t = spe_allocate_available_register(gen->f); | |||
assert(gen->num_itemps < Elements(gen->itemps)); | |||
gen->itemps[gen->num_itemps++] = t; | |||
return t; | |||
} | |||
/** | |||
* Free all intermediate temporary registers. To be called after each | |||
* instruction has been emitted. | |||
*/ | |||
static void | |||
free_itemps(struct codegen *gen) | |||
{ | |||
int i; | |||
for (i = 0; i < gen->num_itemps; i++) { | |||
spe_release_register(gen->f, gen->itemps[i]); | |||
} | |||
gen->num_itemps = 0; | |||
} | |||
/** | |||
* Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. | |||
* The register is allocated and initialized upon the first call. | |||
*/ | |||
static int | |||
get_const_one_reg(struct codegen *gen) | |||
{ | |||
if (gen->one_reg <= 0) { | |||
gen->one_reg = spe_allocate_available_register(gen->f); | |||
} | |||
/* one = {1.0, 1.0, 1.0, 1.0} */ | |||
spe_load_float(gen->f, gen->one_reg, 1.0f); | |||
#if DISASSEM | |||
printf("il\tr%d, 1.0f\n", gen->one_reg); | |||
#endif | |||
return gen->one_reg; | |||
} | |||
/** | |||
* Return the index of the SPU temporary containing the named TGSI | |||
* source register. If the TGSI register is a TGSI_FILE_TEMPORARY we | |||
* just return the corresponding SPE register. If the TGIS register | |||
* is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register | |||
* and emit an SPE load instruction. | |||
*/ | |||
static int | |||
get_src_reg(struct codegen *gen, | |||
int channel, | |||
const struct tgsi_full_src_register *src) | |||
{ | |||
int reg; | |||
/* XXX need to examine src swizzle info here. | |||
* That will involve changing the channel var... | |||
*/ | |||
switch (src->SrcRegister.File) { | |||
case TGSI_FILE_TEMPORARY: | |||
reg = gen->temp_regs[src->SrcRegister.Index][channel]; | |||
break; | |||
case TGSI_FILE_INPUT: | |||
{ | |||
/* offset is measured in quadwords, not bytes */ | |||
int offset = src->SrcRegister.Index * 4 + channel; | |||
reg = get_itemp(gen); | |||
/* Load: reg = memory[(machine_reg) + offset] */ | |||
spe_lqd(gen->f, reg, gen->inputs_reg, offset); | |||
#if DISASSEM | |||
printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); | |||
#endif | |||
} | |||
break; | |||
case TGSI_FILE_IMMEDIATE: | |||
/* xxx fall-through for now / fix */ | |||
case TGSI_FILE_CONSTANT: | |||
/* xxx fall-through for now / fix */ | |||
default: | |||
assert(0); | |||
} | |||
return reg; | |||
} | |||
/** | |||
* Return the index of an SPE register to use for the given TGSI register. | |||
* If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the | |||
* corresponding SPE register is returned. If the TGSI register is | |||
* TGSI_FILE_OUTPUT we allocate an intermediate temporary register. | |||
* See store_dest_reg() below... | |||
*/ | |||
static int | |||
get_dst_reg(struct codegen *gen, | |||
int channel, | |||
const struct tgsi_full_dst_register *dest) | |||
{ | |||
int reg; | |||
switch (dest->DstRegister.File) { | |||
case TGSI_FILE_TEMPORARY: | |||
reg = gen->temp_regs[dest->DstRegister.Index][channel]; | |||
break; | |||
case TGSI_FILE_OUTPUT: | |||
reg = get_itemp(gen); | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
return reg; | |||
} | |||
/** | |||
* When a TGSI instruction is writing to an output register, this | |||
* function emits the SPE store instruction to store the value_reg. | |||
* \param value_reg the SPE register containing the value to store. | |||
* This would have been returned by get_dst_reg(). | |||
*/ | |||
static void | |||
store_dest_reg(struct codegen *gen, | |||
int value_reg, int channel, | |||
const struct tgsi_full_dst_register *dest) | |||
{ | |||
switch (dest->DstRegister.File) { | |||
case TGSI_FILE_TEMPORARY: | |||
/* no-op */ | |||
break; | |||
case TGSI_FILE_OUTPUT: | |||
{ | |||
/* offset is measured in quadwords, not bytes */ | |||
int offset = dest->DstRegister.Index * 4 + channel; | |||
/* Store: memory[(machine_reg) + offset] = reg */ | |||
spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); | |||
#if DISASSEM | |||
printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); | |||
#endif | |||
} | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
} | |||
static boolean | |||
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) | |||
{ | |||
int ch; | |||
for (ch = 0; ch < 4; ch++) { | |||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { | |||
int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); | |||
int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); | |||
/* XXX we don't always need to actually emit a mov instruction here */ | |||
spe_move(gen->f, dst_reg, src_reg); | |||
#if DISASSEM | |||
printf("mov\tr%d, r%d\n", dst_reg, src_reg); | |||
#endif | |||
store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); | |||
free_itemps(gen); | |||
} | |||
} | |||
return true; | |||
} | |||
/** | |||
* Emit addition instructions. Recall that a single TGSI_OPCODE_ADD | |||
* becomes (up to) four SPU "fa" instructions because we're doing SOA | |||
* processing. | |||
*/ | |||
static boolean | |||
emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) | |||
{ | |||
int ch; | |||
/* Loop over Red/Green/Blue/Alpha channels */ | |||
for (ch = 0; ch < 4; ch++) { | |||
/* If the dest R, G, B or A writemask is enabled... */ | |||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { | |||
/* get indexes of the two src, one dest SPE registers */ | |||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); | |||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); | |||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); | |||
/* Emit actual SPE instruction: d = s1 + s2 */ | |||
spe_fa(gen->f, d_reg, s1_reg, s2_reg); | |||
#if DISASSEM | |||
printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); | |||
#endif | |||
/* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ | |||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); | |||
/* Free any intermediate temps we allocated */ | |||
free_itemps(gen); | |||
} | |||
} | |||
return true; | |||
} | |||
/** | |||
* Emit multiply. See emit_ADD for comments. | |||
*/ | |||
static boolean | |||
emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) | |||
{ | |||
int ch; | |||
for (ch = 0; ch < 4; ch++) { | |||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { | |||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); | |||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); | |||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); | |||
/* d = s1 * s2 */ | |||
spe_fm(gen->f, d_reg, s1_reg, s2_reg); | |||
#if DISASSEM | |||
printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); | |||
#endif | |||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); | |||
free_itemps(gen); | |||
} | |||
} | |||
return true; | |||
} | |||
/** | |||
* Emit set-if-greater-than. | |||
* Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as | |||
* the result but OpenGL/TGSI needs 0.0 and 1.0 results. | |||
* We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. | |||
*/ | |||
static boolean | |||
emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) | |||
{ | |||
int ch; | |||
for (ch = 0; ch < 4; ch++) { | |||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { | |||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); | |||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); | |||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); | |||
/* d = (s1 > s2) */ | |||
spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); | |||
#if DISASSEM | |||
printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); | |||
#endif | |||
/* convert d from 0x0/0xffffffff to 0.0/1.0 */ | |||
/* d = d & one_reg */ | |||
spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); | |||
#if DISASSEM | |||
printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); | |||
#endif | |||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); | |||
free_itemps(gen); | |||
} | |||
} | |||
return true; | |||
} | |||
/** | |||
* Emit END instruction. | |||
* We just return from the shader function at this point. | |||
* | |||
* Note that there may be more code after this that would be | |||
* called by TGSI_OPCODE_CALL. | |||
*/ | |||
static boolean | |||
emit_END(struct codegen *gen) | |||
{ | |||
/* return from function call */ | |||
spe_bi(gen->f, SPE_REG_RA, 0, 0); | |||
#if DISASSEM | |||
printf("bi\trRA\n"); | |||
#endif | |||
return true; | |||
} | |||
/** | |||
* Emit code for the given instruction. Just a big switch stmt. | |||
*/ | |||
static boolean | |||
emit_instruction(struct codegen *gen, | |||
const struct tgsi_full_instruction *inst) | |||
{ | |||
switch (inst->Instruction.Opcode) { | |||
case TGSI_OPCODE_MOV: | |||
return emit_MOV(gen, inst); | |||
case TGSI_OPCODE_MUL: | |||
return emit_MUL(gen, inst); | |||
case TGSI_OPCODE_ADD: | |||
return emit_ADD(gen, inst); | |||
case TGSI_OPCODE_SGT: | |||
return emit_SGT(gen, inst); | |||
case TGSI_OPCODE_END: | |||
return emit_END(gen); | |||
/* XXX lots more cases to do... */ | |||
default: | |||
return false; | |||
} | |||
return true; | |||
} | |||
/** | |||
* Emit "code" for a TGSI declaration. | |||
* We only care about TGSI TEMPORARY register declarations at this time. | |||
* For each TGSI TEMPORARY we allocate four SPE registers. | |||
*/ | |||
static void | |||
emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) | |||
{ | |||
int i, ch; | |||
switch (decl->Declaration.File) { | |||
case TGSI_FILE_TEMPORARY: | |||
#if DISASSEM | |||
printf("Declare temp reg %d .. %d\n", | |||
decl->DeclarationRange.First, | |||
decl->DeclarationRange.Last); | |||
#endif | |||
for (i = decl->DeclarationRange.First; | |||
i <= decl->DeclarationRange.Last; | |||
i++) { | |||
for (ch = 0; ch < 4; ch++) { | |||
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); | |||
} | |||
/* XXX if we run out of SPE registers, we need to spill | |||
* to SPU memory. someday... | |||
*/ | |||
#if DISASSEM | |||
printf(" SPE regs: %d %d %d %d\n", | |||
gen->temp_regs[i][0], | |||
gen->temp_regs[i][1], | |||
gen->temp_regs[i][2], | |||
gen->temp_regs[i][3]); | |||
#endif | |||
} | |||
break; | |||
default: | |||
; /* ignore */ | |||
} | |||
} | |||
/** | |||
* Translate TGSI shader code to SPE instructions. This is done when | |||
* the state tracker gives us a new shader (via pipe->create_fs_state()). | |||
* | |||
* \param cell the rendering context (in) | |||
* \param tokens the TGSI shader (in) | |||
* \param f the generated function (out) | |||
*/ | |||
boolean | |||
cell_gen_fragment_program(struct cell_context *cell, | |||
const struct tgsi_token *tokens, | |||
struct spe_function *f) | |||
{ | |||
struct tgsi_parse_context parse; | |||
struct codegen gen; | |||
memset(&gen, 0, sizeof(gen)); | |||
gen.f = f; | |||
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ | |||
gen.inputs_reg = 3; /* pointer to inputs array */ | |||
gen.outputs_reg = 4; /* pointer to outputs array */ | |||
gen.constants_reg = 5; /* pointer to constants array */ | |||
spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); | |||
spe_allocate_register(f, gen.inputs_reg); | |||
spe_allocate_register(f, gen.outputs_reg); | |||
spe_allocate_register(f, gen.constants_reg); | |||
#if DISASSEM | |||
printf("Begin %s\n", __FUNCTION__); | |||
tgsi_dump(tokens, 0); | |||
#endif | |||
tgsi_parse_init(&parse, tokens); | |||
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { | |||
tgsi_parse_token(&parse); | |||
switch (parse.FullToken.Token.Type) { | |||
case TGSI_TOKEN_TYPE_IMMEDIATE: | |||
#if 0 | |||
if (!note_immediate(&gen, &parse.FullToken.FullImmediate )) | |||
goto fail; | |||
#endif | |||
break; | |||
case TGSI_TOKEN_TYPE_DECLARATION: | |||
emit_declaration(&gen, &parse.FullToken.FullDeclaration); | |||
break; | |||
case TGSI_TOKEN_TYPE_INSTRUCTION: | |||
if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) { | |||
gen.error = true; | |||
} | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
} | |||
if (gen.error) { | |||
/* terminate the SPE code */ | |||
return emit_END(&gen); | |||
} | |||
#if DISASSEM | |||
printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); | |||
printf("End %s\n", __FUNCTION__); | |||
#endif | |||
tgsi_parse_free( &parse ); | |||
return !gen.error; | |||
} |
@@ -0,0 +1,42 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
#ifndef CELL_GEN_FP_H | |||
#define CELL_GEN_FP_H | |||
extern boolean | |||
cell_gen_fragment_program(struct cell_context *cell, | |||
const struct tgsi_token *tokens, | |||
struct spe_function *f); | |||
#endif /* CELL_GEN_FP_H */ | |||
@@ -265,6 +265,8 @@ gen_blend(const struct pipe_blend_state *blend, | |||
int one_reg = spe_allocate_available_register(f); | |||
int tmp_reg = spe_allocate_available_register(f); | |||
boolean one_reg_set = false; /* avoid setting one_reg more than once */ | |||
ASSERT(blend->blend_enable); | |||
/* Unpack/convert framebuffer colors from four 32-bit packed colors | |||
@@ -275,7 +277,7 @@ gen_blend(const struct pipe_blend_state *blend, | |||
int mask_reg = spe_allocate_available_register(f); | |||
/* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ | |||
spe_fsmbi(f, mask_reg, 0x1111); | |||
spe_load_int(f, mask_reg, 0xff); | |||
/* XXX there may be more clever ways to implement the following code */ | |||
switch (color_format) { | |||
@@ -418,7 +420,10 @@ gen_blend(const struct pipe_blend_state *blend, | |||
break; | |||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | |||
/* one = {1.0, 1.0, 1.0, 1.0} */ | |||
spe_load_float(f, one_reg, 1.0f); | |||
if (!one_reg_set) { | |||
spe_load_float(f, one_reg, 1.0f); | |||
one_reg_set = true; | |||
} | |||
/* tmp = one - fragA */ | |||
spe_fs(f, tmp_reg, one_reg, fragA_reg); | |||
/* term = fb * tmp */ | |||
@@ -446,7 +451,10 @@ gen_blend(const struct pipe_blend_state *blend, | |||
break; | |||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | |||
/* one = {1.0, 1.0, 1.0, 1.0} */ | |||
spe_load_float(f, one_reg, 1.0f); | |||
if (!one_reg_set) { | |||
spe_load_float(f, one_reg, 1.0f); | |||
one_reg_set = true; | |||
} | |||
/* tmp = one - fragA */ | |||
spe_fs(f, tmp_reg, one_reg, fragA_reg); | |||
/* termA = fbA * tmp */ | |||
@@ -616,7 +624,7 @@ gen_pack_colors(struct spe_function *f, | |||
* \param f the generated function (out) | |||
*/ | |||
void | |||
gen_fragment_function(struct cell_context *cell, struct spe_function *f) | |||
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) | |||
{ | |||
const struct pipe_depth_stencil_alpha_state *dsa = | |||
&cell->depth_stencil->base; | |||
@@ -850,7 +858,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f) | |||
spe_release_register(f, rgba_reg); | |||
} | |||
printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); | |||
//printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); | |||
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ | |||
@@ -31,7 +31,7 @@ | |||
extern void | |||
gen_fragment_function(struct cell_context *cell, struct spe_function *f); | |||
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f); | |||
#endif /* CELL_GEN_FRAGMENT_H */ |
@@ -73,6 +73,22 @@ cell_emit_state(struct cell_context *cell) | |||
#endif | |||
} | |||
if (cell->dirty & (CELL_NEW_FS)) { | |||
/* Send new fragment program to SPUs */ | |||
struct cell_command_fragment_program *fp | |||
= cell_batch_alloc(cell, sizeof(*fp)); | |||
fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM; | |||
fp->num_inst = cell->fs->code.num_inst; | |||
memcpy(&fp->code, cell->fs->code.store, | |||
SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); | |||
if (0) { | |||
int i; | |||
printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); | |||
for (i = 0; i < fp->num_inst; i++) { | |||
printf(" %3d: 0x%08x\n", i, fp->code[i]); | |||
} | |||
} | |||
} | |||
if (cell->dirty & (CELL_NEW_FRAMEBUFFER | | |||
CELL_NEW_DEPTH_STENCIL | | |||
@@ -85,7 +101,7 @@ cell_emit_state(struct cell_context *cell) | |||
struct spe_function spe_code; | |||
/* generate new code */ | |||
gen_fragment_function(cell, &spe_code); | |||
cell_gen_fragment_function(cell, &spe_code); | |||
/* put the new code into the batch buffer */ | |||
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; | |||
memcpy(&fops->code, spe_code.store, |
@@ -34,7 +34,7 @@ | |||
#include "cell_context.h" | |||
#include "cell_state.h" | |||
#include "cell_gen_fp.h" | |||
/** cast wrapper */ | |||
@@ -61,7 +61,7 @@ static void * | |||
cell_create_fs_state(struct pipe_context *pipe, | |||
const struct pipe_shader_state *templ) | |||
{ | |||
/*struct cell_context *cell = cell_context(pipe);*/ | |||
struct cell_context *cell = cell_context(pipe); | |||
struct cell_fragment_shader_state *cfs; | |||
cfs = CALLOC_STRUCT(cell_fragment_shader_state); | |||
@@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe, | |||
tgsi_scan_shader(templ->tokens, &cfs->info); | |||
cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); | |||
return cfs; | |||
} | |||
@@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) | |||
{ | |||
struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); | |||
spe_release_func(&cfs->code); | |||
FREE((void *) cfs->shader.tokens); | |||
FREE(cfs); | |||
} |
@@ -232,7 +232,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) | |||
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); | |||
/* Copy SPU code from batch buffer to spu buffer */ | |||
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); | |||
/* Copy state info */ | |||
/* Copy state info (for fallback case only) */ | |||
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); | |||
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); | |||
@@ -244,6 +244,21 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) | |||
} | |||
static void | |||
cmd_state_fragment_program(const struct cell_command_fragment_program *fp) | |||
{ | |||
if (Debug) | |||
printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); | |||
/* Copy SPU code from batch buffer to spu buffer */ | |||
memcpy(spu.fragment_program_code, fp->code, | |||
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); | |||
#if 01 | |||
/* Point function pointer at new code */ | |||
spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; | |||
#endif | |||
} | |||
static void | |||
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) | |||
{ | |||
@@ -473,6 +488,14 @@ cmd_batch(uint opcode) | |||
pos += sizeof(*fops) / 8; | |||
} | |||
break; | |||
case CELL_CMD_STATE_FRAGMENT_PROGRAM: | |||
{ | |||
struct cell_command_fragment_program *fp | |||
= (struct cell_command_fragment_program *) &buffer[pos]; | |||
cmd_state_fragment_program(fp); | |||
pos += sizeof(*fp) / 8; | |||
} | |||
break; | |||
case CELL_CMD_STATE_SAMPLER: | |||
{ | |||
struct cell_command_sampler *sampler |
@@ -75,6 +75,12 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, | |||
vector float fragAlpha, | |||
vector unsigned int mask); | |||
/** Function for running fragment program */ | |||
typedef void (*spu_fragment_program_func)(vector float *inputs, | |||
vector float *outputs, | |||
vector float *constants); | |||
struct spu_framebuffer | |||
{ | |||
void *color_start; /**< addr of color surface in main memory */ | |||
@@ -142,9 +148,18 @@ struct spu_global | |||
/** Current fragment ops function */ | |||
spu_fragment_ops_func fragment_ops; | |||
/** Current fragment program machine code */ | |||
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; | |||
/** Current fragment ops function */ | |||
spu_fragment_program_func fragment_program; | |||
/** Current texture sampler function */ | |||
spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; | |||
/** Fragment program constants (XXX preliminary/used) */ | |||
#define MAX_CONSTANTS 32 | |||
vector float constants[MAX_CONSTANTS]; | |||
} ALIGN16_ATTRIB; | |||
@@ -314,7 +314,42 @@ emit_quad( int x, int y, mask_t mask ) | |||
} | |||
else { | |||
/* simple shading */ | |||
#if 0 | |||
eval_coeff(1, (float) x, (float) y, colors); | |||
#else | |||
/* XXX new fragment program code */ | |||
if (spu.fragment_program) { | |||
vector float inputs[4*4], outputs[2*4]; | |||
/* setup inputs */ | |||
eval_coeff(1, (float) x, (float) y, inputs); | |||
/* Execute the current fragment program */ | |||
spu.fragment_program(inputs, outputs, spu.constants); | |||
/* Copy outputs */ | |||
colors[0] = outputs[0*4+0]; | |||
colors[1] = outputs[0*4+1]; | |||
colors[2] = outputs[0*4+2]; | |||
colors[3] = outputs[0*4+3]; | |||
if (0 && spu.init.id==0 && y == 48) { | |||
printf("colors[0] = %f %f %f %f\n", | |||
spu_extract(colors[0], 0), | |||
spu_extract(colors[0], 1), | |||
spu_extract(colors[0], 2), | |||
spu_extract(colors[0], 3)); | |||
printf("colors[1] = %f %f %f %f\n", | |||
spu_extract(colors[1], 0), | |||
spu_extract(colors[1], 1), | |||
spu_extract(colors[1], 2), | |||
spu_extract(colors[1], 3)); | |||
} | |||
} | |||
#endif | |||
} | |||
@@ -289,21 +289,19 @@ xm_buffer_destroy(struct pipe_winsys *pws, | |||
* +--+--+ | |||
*/ | |||
static void | |||
twiddle_tile(uint *tile) | |||
twiddle_tile(const uint *tileIn, uint *tileOut) | |||
{ | |||
uint tile2[TILE_SIZE * TILE_SIZE]; | |||
int y, x; | |||
for (y = 0; y < TILE_SIZE; y+=2) { | |||
for (x = 0; x < TILE_SIZE; x+=2) { | |||
int k = 4 * (y/2 * TILE_SIZE/2 + x/2); | |||
tile2[y * TILE_SIZE + (x + 0)] = tile[k]; | |||
tile2[y * TILE_SIZE + (x + 1)] = tile[k+1]; | |||
tile2[(y + 1) * TILE_SIZE + (x + 0)] = tile[k+2]; | |||
tile2[(y + 1) * TILE_SIZE + (x + 1)] = tile[k+3]; | |||
tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; | |||
tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; | |||
tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; | |||
tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; | |||
} | |||
} | |||
memcpy(tile, tile2, sizeof(tile2)); | |||
} | |||
@@ -339,6 +337,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) | |||
for (y = 0; y < surf->height; y += TILE_SIZE) { | |||
for (x = 0; x < surf->width; x += TILE_SIZE) { | |||
uint tmpTile[TILE_SIZE * TILE_SIZE]; | |||
int tx = x / TILE_SIZE; | |||
int ty = y / TILE_SIZE; | |||
int offset = ty * tilesPerRow + tx; | |||
@@ -352,9 +351,9 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) | |||
offset *= 4 * TILE_SIZE * TILE_SIZE; | |||
ximage->data = (char *) xm_buf->data + offset; | |||
twiddle_tile((uint *) ximage->data); | |||
twiddle_tile((uint *) ((char *) xm_buf->data + offset), | |||
tmpTile); | |||
ximage->data = (char*) tmpTile; | |||
if (XSHM_ENABLED(xm_buf)) { | |||
#if defined(USE_XSHM) && !defined(XFree86Server) |