Do code generation for alpha test, z test, stencil, blend, colormask and framebuffer/tile read/write as a single code block. Ian's previous blend/z/stencil test code is still there but mostly disabled and will be removed soon.tags/mesa_20090313
@@ -97,6 +97,7 @@ | |||
#define CELL_CMD_STATE_LOGICOP 21 | |||
#define CELL_CMD_VS_EXECUTE 22 | |||
#define CELL_CMD_FLUSH_BUFFER_RANGE 23 | |||
#define CELL_CMD_STATE_FRAGMENT_OPS 24 | |||
#define CELL_NUM_BUFFERS 4 | |||
@@ -112,30 +113,43 @@ | |||
/** | |||
*/ | |||
struct cell_command_depth_stencil_alpha_test { | |||
struct cell_command_depth_stencil_alpha_test | |||
{ | |||
uint64_t base; /**< Effective address of code start. */ | |||
unsigned size; /**< Size in bytes of SPE code. */ | |||
unsigned read_depth; /**< Flag: should depth be read? */ | |||
unsigned read_stencil; /**< Flag: should stencil be read? */ | |||
struct pipe_depth_stencil_alpha_state state; | |||
}; | |||
/** | |||
* Upload code to perform framebuffer blend operation | |||
*/ | |||
struct cell_command_blend { | |||
struct cell_command_blend | |||
{ | |||
uint64_t base; /**< Effective address of code start. */ | |||
unsigned size; /**< Size in bytes of SPE code. */ | |||
unsigned read_fb; /**< Flag: should framebuffer be read? */ | |||
}; | |||
struct cell_command_logicop { | |||
struct cell_command_logicop | |||
{ | |||
uint64_t base; /**< Effective address of code start. */ | |||
unsigned size; /**< Size in bytes of SPE code. */ | |||
}; | |||
#define SPU_MAX_FRAGMENT_OPS_INSTS 64 | |||
struct cell_command_fragment_ops | |||
{ | |||
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ | |||
unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; | |||
}; | |||
/** | |||
* Tell SPUs about the framebuffer size, location | |||
*/ |
@@ -25,6 +25,7 @@ SOURCES = \ | |||
cell_context.c \ | |||
cell_draw_arrays.c \ | |||
cell_flush.c \ | |||
cell_gen_fragment.c \ | |||
cell_state_derived.c \ | |||
cell_state_emit.c \ | |||
cell_state_per_fragment.c \ |
@@ -0,0 +1,530 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/** | |||
* Generate SPU per-fragment code (actually per-quad code). | |||
* \author Brian Paul | |||
*/ | |||
#include "pipe/p_defines.h" | |||
#include "pipe/p_state.h" | |||
#include "rtasm/rtasm_ppc_spe.h" | |||
#include "cell_context.h" | |||
#include "cell_gen_fragment.h" | |||
/** Do extra optimizations? */ | |||
#define OPTIMIZATIONS 1 | |||
/** | |||
* Generate SPE code to perform Z/depth testing. | |||
* | |||
* \param dsa Gallium depth/stencil/alpha state to gen code for | |||
* \param f SPE function to append instruction onto. | |||
* \param mask_reg register containing quad/pixel "alive" mask (in/out) | |||
* \param ifragZ_reg register containing integer fragment Z values (in) | |||
* \param ifbZ_reg register containing integer frame buffer Z values (in/out) | |||
* \param zmask_reg register containing result of Z test/comparison (out) | |||
*/ | |||
static void | |||
gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, | |||
struct spe_function *f, | |||
int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) | |||
{ | |||
ASSERT(dsa->depth.enabled); | |||
switch (dsa->depth.func) { | |||
case PIPE_FUNC_EQUAL: | |||
/* zmask = (ifragZ == ref) */ | |||
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); | |||
/* mask = (mask & zmask) */ | |||
spe_and(f, mask_reg, mask_reg, zmask_reg); | |||
break; | |||
case PIPE_FUNC_NOTEQUAL: | |||
/* zmask = (ifragZ == ref) */ | |||
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); | |||
/* mask = (mask & ~zmask) */ | |||
spe_andc(f, mask_reg, mask_reg, zmask_reg); | |||
break; | |||
case PIPE_FUNC_GREATER: | |||
/* zmask = (ifragZ > ref) */ | |||
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); | |||
/* mask = (mask & zmask) */ | |||
spe_and(f, mask_reg, mask_reg, zmask_reg); | |||
break; | |||
case PIPE_FUNC_LESS: | |||
/* zmask = (ref > ifragZ) */ | |||
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); | |||
/* mask = (mask & zmask) */ | |||
spe_and(f, mask_reg, mask_reg, zmask_reg); | |||
break; | |||
case PIPE_FUNC_LEQUAL: | |||
/* zmask = (ifragZ > ref) */ | |||
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); | |||
/* mask = (mask & ~zmask) */ | |||
spe_andc(f, mask_reg, mask_reg, zmask_reg); | |||
break; | |||
case PIPE_FUNC_GEQUAL: | |||
/* zmask = (ref > ifragZ) */ | |||
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); | |||
/* mask = (mask & ~zmask) */ | |||
spe_andc(f, mask_reg, mask_reg, zmask_reg); | |||
break; | |||
case PIPE_FUNC_NEVER: | |||
spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ | |||
spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ | |||
break; | |||
case PIPE_FUNC_ALWAYS: | |||
/* mask unchanged */ | |||
spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ | |||
break; | |||
default: | |||
ASSERT(0); | |||
break; | |||
} | |||
if (dsa->depth.writemask) { | |||
/* | |||
* If (ztest passed) { | |||
* framebufferZ = fragmentZ; | |||
* } | |||
* OR, | |||
* framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; | |||
*/ | |||
spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); | |||
} | |||
} | |||
/** | |||
* Generate SPE code to perform alpha testing. | |||
* | |||
* \param dsa Gallium depth/stencil/alpha state to gen code for | |||
* \param f SPE function to append instruction onto. | |||
* \param mask_reg register containing quad/pixel "alive" mask (in/out) | |||
* \param fragA_reg register containing four fragment alpha values (in) | |||
*/ | |||
static void | |||
gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, | |||
struct spe_function *f, int mask_reg, int fragA_reg) | |||
{ | |||
int ref_reg = spe_allocate_available_register(f); | |||
int amask_reg = spe_allocate_available_register(f); | |||
ASSERT(dsa->alpha.enabled); | |||
if ((dsa->alpha.func != PIPE_FUNC_NEVER) && | |||
(dsa->alpha.func != PIPE_FUNC_ALWAYS)) { | |||
/* load/splat the alpha reference float value */ | |||
spe_load_float(f, ref_reg, dsa->alpha.ref); | |||
} | |||
/* emit code to do the alpha comparison, updating 'mask' */ | |||
switch (dsa->alpha.func) { | |||
case PIPE_FUNC_EQUAL: | |||
/* amask = (fragA == ref) */ | |||
spe_fceq(f, amask_reg, fragA_reg, ref_reg); | |||
/* mask = (mask & amask) */ | |||
spe_and(f, mask_reg, mask_reg, amask_reg); | |||
break; | |||
case PIPE_FUNC_NOTEQUAL: | |||
/* amask = (fragA == ref) */ | |||
spe_fceq(f, amask_reg, fragA_reg, ref_reg); | |||
/* mask = (mask & ~amask) */ | |||
spe_andc(f, mask_reg, mask_reg, amask_reg); | |||
break; | |||
case PIPE_FUNC_GREATER: | |||
/* amask = (fragA > ref) */ | |||
spe_fcgt(f, amask_reg, fragA_reg, ref_reg); | |||
/* mask = (mask & amask) */ | |||
spe_and(f, mask_reg, mask_reg, amask_reg); | |||
break; | |||
case PIPE_FUNC_LESS: | |||
/* amask = (ref > fragA) */ | |||
spe_fcgt(f, amask_reg, ref_reg, fragA_reg); | |||
/* mask = (mask & amask) */ | |||
spe_and(f, mask_reg, mask_reg, amask_reg); | |||
break; | |||
case PIPE_FUNC_LEQUAL: | |||
/* amask = (fragA > ref) */ | |||
spe_fcgt(f, amask_reg, fragA_reg, ref_reg); | |||
/* mask = (mask & ~amask) */ | |||
spe_andc(f, mask_reg, mask_reg, amask_reg); | |||
break; | |||
case PIPE_FUNC_GEQUAL: | |||
/* amask = (ref > fragA) */ | |||
spe_fcgt(f, amask_reg, ref_reg, fragA_reg); | |||
/* mask = (mask & ~amask) */ | |||
spe_andc(f, mask_reg, mask_reg, amask_reg); | |||
break; | |||
case PIPE_FUNC_NEVER: | |||
spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ | |||
break; | |||
case PIPE_FUNC_ALWAYS: | |||
/* no-op, mask unchanged */ | |||
break; | |||
default: | |||
ASSERT(0); | |||
break; | |||
} | |||
#if OPTIMIZATIONS | |||
/* if mask == {0,0,0,0} we're all done, return */ | |||
{ | |||
/* re-use amask reg here */ | |||
int tmp_reg = amask_reg; | |||
/* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ | |||
spe_orx(f, tmp_reg, mask_reg); | |||
/* if tmp[0] == 0 then return from function call */ | |||
spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); | |||
} | |||
#endif | |||
spe_release_register(f, ref_reg); | |||
spe_release_register(f, amask_reg); | |||
} | |||
/** | |||
* Generate SPE code to implement the fragment operations (alpha test, | |||
* depth test, stencil test, blending, colormask, and final | |||
* framebuffer write) as specified by the current context state. | |||
* | |||
* Logically, this code will be called after running the fragment | |||
* shader. But under some circumstances we could run some of this | |||
* code before the fragment shader to cull fragments/quads that are | |||
* totally occluded/discarded. | |||
* | |||
* XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. | |||
* | |||
* See the spu_default_fragment_ops() function to see how the per-fragment | |||
* operations would be done with ordinary C code. | |||
* The code we generate here though has no branches, is SIMD, etc and | |||
* should be much faster. | |||
* | |||
* \param cell the rendering context (in) | |||
* \param f the generated function (out) | |||
*/ | |||
void | |||
gen_fragment_function(struct cell_context *cell, struct spe_function *f) | |||
{ | |||
const struct pipe_depth_stencil_alpha_state *dsa = | |||
&cell->depth_stencil->base; | |||
const struct pipe_blend_state *blend = &cell->blend->base; | |||
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ | |||
const int x_reg = 3; /* uint */ | |||
const int y_reg = 4; /* uint */ | |||
const int color_tile_reg = 5; /* tile_t * */ | |||
const int depth_tile_reg = 6; /* tile_t * */ | |||
const int fragZ_reg = 7; /* vector float */ | |||
const int fragR_reg = 8; /* vector float */ | |||
const int fragG_reg = 9; /* vector float */ | |||
const int fragB_reg = 10; /* vector float */ | |||
const int fragA_reg = 11; /* vector float */ | |||
const int mask_reg = 12; /* vector uint */ | |||
/* offset of quad from start of tile | |||
* XXX assuming 4-byte pixels for color AND Z/stencil!!!! | |||
*/ | |||
int quad_offset_reg; | |||
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ | |||
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ | |||
spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); | |||
spe_allocate_register(f, x_reg); | |||
spe_allocate_register(f, y_reg); | |||
spe_allocate_register(f, color_tile_reg); | |||
spe_allocate_register(f, depth_tile_reg); | |||
spe_allocate_register(f, fragZ_reg); | |||
spe_allocate_register(f, fragR_reg); | |||
spe_allocate_register(f, fragG_reg); | |||
spe_allocate_register(f, fragB_reg); | |||
spe_allocate_register(f, fragA_reg); | |||
spe_allocate_register(f, mask_reg); | |||
quad_offset_reg = spe_allocate_available_register(f); | |||
fbRGBA_reg = spe_allocate_available_register(f); | |||
fbZS_reg = spe_allocate_available_register(f); | |||
/* compute offset of quad from start of tile, in bytes */ | |||
{ | |||
int x2_reg = spe_allocate_available_register(f); | |||
int y2_reg = spe_allocate_available_register(f); | |||
ASSERT(TILE_SIZE == 32); | |||
spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ | |||
spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ | |||
spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ | |||
spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ | |||
spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ | |||
spe_release_register(f, x2_reg); | |||
spe_release_register(f, y2_reg); | |||
} | |||
if (dsa->alpha.enabled) { | |||
gen_alpha_test(dsa, f, mask_reg, fragA_reg); | |||
} | |||
if (dsa->depth.enabled || dsa->stencil[0].enabled) { | |||
const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; | |||
boolean write_depth_stencil; | |||
int fbZ_reg = spe_allocate_available_register(f); /* Z values */ | |||
int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ | |||
/* fetch quad of depth/stencil values from tile at (x,y) */ | |||
/* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ | |||
spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); | |||
if (dsa->depth.enabled) { | |||
/* Extract Z bits from fbZS_reg into fbZ_reg */ | |||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM || | |||
zs_format == PIPE_FORMAT_X8Z24_UNORM) { | |||
int mask_reg = spe_allocate_available_register(f); | |||
spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ | |||
spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ | |||
spe_release_register(f, mask_reg); | |||
/* OK, fbZ_reg has four 24-bit Z values now */ | |||
} | |||
else { | |||
/* XXX handle other z/stencil formats */ | |||
ASSERT(0); | |||
} | |||
/* Convert fragZ values from float[4] to uint[4] */ | |||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM || | |||
zs_format == PIPE_FORMAT_X8Z24_UNORM || | |||
zs_format == PIPE_FORMAT_Z24S8_UNORM || | |||
zs_format == PIPE_FORMAT_Z24X8_UNORM) { | |||
/* 24-bit Z values */ | |||
int scale_reg = spe_allocate_available_register(f); | |||
/* scale_reg[0,1,2,3] = float(2^24-1) */ | |||
spe_load_float(f, scale_reg, (float) 0xffffff); | |||
/* XXX these two instructions might be combined */ | |||
spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ | |||
spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ | |||
spe_release_register(f, scale_reg); | |||
} | |||
else { | |||
/* XXX handle 16-bit Z format */ | |||
ASSERT(0); | |||
} | |||
} | |||
if (dsa->stencil[0].enabled) { | |||
/* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ | |||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM || | |||
zs_format == PIPE_FORMAT_X8Z24_UNORM) { | |||
/* XXX extract with a shift */ | |||
ASSERT(0); | |||
} | |||
else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || | |||
zs_format == PIPE_FORMAT_Z24X8_UNORM) { | |||
/* XXX extract with a mask */ | |||
ASSERT(0); | |||
} | |||
} | |||
if (dsa->stencil[0].enabled) { | |||
/* XXX this may involve depth testing too */ | |||
// gen_stencil_test(dsa, f, ... ); | |||
ASSERT(0); | |||
} | |||
else if (dsa->depth.enabled) { | |||
int zmask_reg = spe_allocate_available_register(f); | |||
gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); | |||
spe_release_register(f, zmask_reg); | |||
} | |||
/* do we need to write Z and/or Stencil back into framebuffer? */ | |||
write_depth_stencil = (dsa->depth.writemask | | |||
dsa->stencil[0].write_mask | | |||
dsa->stencil[1].write_mask); | |||
if (write_depth_stencil) { | |||
/* Merge latest Z and Stencil values into fbZS_reg. | |||
* fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. | |||
* fbS_reg has four 8-bit Z values in bits [7..0]. | |||
*/ | |||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM || | |||
zs_format == PIPE_FORMAT_X8Z24_UNORM) { | |||
spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ | |||
spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ | |||
} | |||
else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || | |||
zs_format == PIPE_FORMAT_X8Z24_UNORM) { | |||
/* XXX to do */ | |||
ASSERT(0); | |||
} | |||
else if (zs_format == PIPE_FORMAT_Z16_UNORM) { | |||
/* XXX to do */ | |||
ASSERT(0); | |||
} | |||
else if (zs_format == PIPE_FORMAT_S8_UNORM) { | |||
/* XXX to do */ | |||
ASSERT(0); | |||
} | |||
else { | |||
/* bad zs_format */ | |||
ASSERT(0); | |||
} | |||
/* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ | |||
spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); | |||
} | |||
spe_release_register(f, fbZ_reg); | |||
spe_release_register(f, fbS_reg); | |||
} | |||
/* Get framebuffer quad/colors. We'll need these for blending, | |||
* color masking, and to obey the quad/pixel mask. | |||
* Load: fbRGBA_reg = memory[color_tile + quad_offset] | |||
* Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking | |||
* we could skip this load. | |||
*/ | |||
spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); | |||
if (blend->blend_enable) { | |||
/* convert packed tile colors in fbRGBA_reg to float[4] vectors */ | |||
// gen_blend_code(blend, f, mask_reg, ... ); | |||
} | |||
/* | |||
* Write fragment colors to framebuffer/tile. | |||
* This involves converting the fragment colors from float[4] to the | |||
* tile's specific format and obeying the quad/pixel mask. | |||
*/ | |||
{ | |||
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; | |||
int rgba_reg = spe_allocate_available_register(f); | |||
/* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ | |||
spe_cfltu(f, fragR_reg, fragR_reg, 32); | |||
spe_cfltu(f, fragG_reg, fragG_reg, 32); | |||
spe_cfltu(f, fragB_reg, fragB_reg, 32); | |||
spe_cfltu(f, fragA_reg, fragA_reg, 32); | |||
/* Shift most the significant bytes to least the significant positions. | |||
* I.e.: reg = reg >> 24 | |||
*/ | |||
spe_rotmi(f, fragR_reg, fragR_reg, -24); | |||
spe_rotmi(f, fragG_reg, fragG_reg, -24); | |||
spe_rotmi(f, fragB_reg, fragB_reg, -24); | |||
spe_rotmi(f, fragA_reg, fragA_reg, -24); | |||
/* Shift the color bytes according to the surface format */ | |||
if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { | |||
spe_roti(f, fragG_reg, fragG_reg, 8); /* green <<= 8 */ | |||
spe_roti(f, fragR_reg, fragR_reg, 16); /* red <<= 16 */ | |||
spe_roti(f, fragA_reg, fragA_reg, 24); /* alpha <<= 24 */ | |||
} | |||
else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { | |||
spe_roti(f, fragR_reg, fragR_reg, 8); /* red <<= 8 */ | |||
spe_roti(f, fragG_reg, fragG_reg, 16); /* green <<= 16 */ | |||
spe_roti(f, fragB_reg, fragB_reg, 24); /* blue <<= 24 */ | |||
} | |||
else { | |||
ASSERT(0); | |||
} | |||
/* Merge red, green, blue, alpha registers to make packed RGBA colors. | |||
* Eg: after shifting according to color_format we might have: | |||
* R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} | |||
* G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} | |||
* B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} | |||
* A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} | |||
* OR-ing all those together gives us four packed colors: | |||
* RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} | |||
*/ | |||
spe_or(f, rgba_reg, fragR_reg, fragG_reg); | |||
spe_or(f, rgba_reg, rgba_reg, fragB_reg); | |||
spe_or(f, rgba_reg, rgba_reg, fragA_reg); | |||
/* Mix fragment colors with framebuffer colors using the quad/pixel mask: | |||
* if (mask[i]) | |||
* rgba[i] = rgba[i]; | |||
* else | |||
* rgba[i] = framebuffer[i]; | |||
*/ | |||
spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); | |||
/* Store updated quad in tile: | |||
* memory[color_tile + quad_offset] = rgba_reg; | |||
*/ | |||
spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); | |||
spe_release_register(f, rgba_reg); | |||
} | |||
printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); | |||
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ | |||
spe_release_register(f, fbRGBA_reg); | |||
spe_release_register(f, fbZS_reg); | |||
spe_release_register(f, quad_offset_reg); | |||
} | |||
@@ -0,0 +1,38 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
#ifndef CELL_GEN_FRAGMENT_H | |||
#define CELL_GEN_FRAGMENT_H | |||
extern void | |||
gen_fragment_function(struct cell_context *cell, struct spe_function *f); | |||
#endif /* CELL_GEN_FRAGMENT_H */ | |||
@@ -27,6 +27,7 @@ | |||
#include "util/u_memory.h" | |||
#include "cell_context.h" | |||
#include "cell_gen_fragment.h" | |||
#include "cell_state.h" | |||
#include "cell_state_emit.h" | |||
#include "cell_state_per_fragment.h" | |||
@@ -83,6 +84,29 @@ cell_emit_state(struct cell_context *cell) | |||
fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; | |||
fb->width = cell->framebuffer.width; | |||
fb->height = cell->framebuffer.height; | |||
#if 0 | |||
printf("EMIT color format %s\n", pf_name(fb->color_format)); | |||
printf("EMIT depth format %s\n", pf_name(fb->depth_format)); | |||
#endif | |||
} | |||
if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL)) { | |||
/* XXX we don't want to always do codegen here. We should have | |||
* a hash/lookup table to cache previous results... | |||
*/ | |||
struct cell_command_fragment_ops *fops | |||
= cell_batch_alloc(cell, sizeof(*fops)); | |||
struct spe_function spe_code; | |||
/* generate new code */ | |||
gen_fragment_function(cell, &spe_code); | |||
/* put the new code into the batch buffer */ | |||
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; | |||
memcpy(&fops->code, spe_code.store, | |||
SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); | |||
/* free codegen buffer */ | |||
spe_release_func(&spe_code); | |||
} | |||
if (cell->dirty & CELL_NEW_BLEND) { | |||
@@ -90,8 +114,7 @@ cell_emit_state(struct cell_context *cell) | |||
if (cell->blend != NULL) { | |||
blend.base = (intptr_t) cell->blend->code.store; | |||
blend.size = (char *) cell->blend->code.csr | |||
- (char *) cell->blend->code.store; | |||
blend.size = cell->blend->code.num_inst * SPE_INST_SIZE; | |||
blend.read_fb = TRUE; | |||
} | |||
else { | |||
@@ -108,10 +131,10 @@ cell_emit_state(struct cell_context *cell) | |||
if (cell->depth_stencil != NULL) { | |||
dsat.base = (intptr_t) cell->depth_stencil->code.store; | |||
dsat.size = (char *) cell->depth_stencil->code.csr | |||
- (char *) cell->depth_stencil->code.store; | |||
dsat.size = cell->depth_stencil->code.num_inst * SPE_INST_SIZE; | |||
dsat.read_depth = TRUE; | |||
dsat.read_stencil = FALSE; | |||
dsat.state = cell->depth_stencil->base; | |||
} | |||
else { | |||
dsat.base = 0; |
@@ -1158,7 +1158,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) | |||
static int | |||
PC_OFFSET(const struct spe_function *f, const void *d) | |||
{ | |||
const intptr_t pc = (intptr_t) f->csr; | |||
const intptr_t pc = (intptr_t) &f->store[f->num_inst]; | |||
const intptr_t ea = ~0x0f & (intptr_t) d; | |||
return (ea - pc) >> 2; |
@@ -43,7 +43,7 @@ INCLUDE_DIRS = \ | |||
$(SPU_CC) $(SPU_CFLAGS) -c $< | |||
.c.s: | |||
$(SPU_CC) $(SPU_CFLAGS) -S $< | |||
$(SPU_CC) $(SPU_CFLAGS) -O3 -S $< | |||
# The .a file will be linked into the main/PPU executable |
@@ -34,6 +34,7 @@ | |||
#include "spu_main.h" | |||
#include "spu_render.h" | |||
#include "spu_per_fragment_op.h" | |||
#include "spu_texture.h" | |||
#include "spu_tile.h" | |||
//#include "spu_test.h" | |||
@@ -46,7 +47,7 @@ | |||
/* | |||
helpful headers: | |||
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h | |||
/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h | |||
/opt/cell/sdk/usr/include/libmisc.h | |||
*/ | |||
boolean Debug = FALSE; | |||
@@ -226,6 +227,24 @@ cmd_release_verts(const struct cell_command_release_verts *release) | |||
} | |||
/** | |||
* Process a CELL_CMD_STATE_FRAGMENT_OPS command. | |||
* This involves installing new fragment ops SPU code. | |||
* If this function is never called, we'll use a regular C fallback function | |||
* for fragment processing. | |||
*/ | |||
static void | |||
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) | |||
{ | |||
if (Debug) | |||
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); | |||
/* Copy SPU code from batch buffer to spu buffer */ | |||
memcpy(spu.fragment_ops.code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); | |||
/* Point function pointer at new code */ | |||
spu.fragment_ops.func = (spu_fragment_ops_func) spu.fragment_ops.code; | |||
} | |||
static void | |||
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) | |||
{ | |||
@@ -257,6 +276,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) | |||
break; | |||
case PIPE_FORMAT_Z24S8_UNORM: | |||
case PIPE_FORMAT_S8Z24_UNORM: | |||
case PIPE_FORMAT_Z24X8_UNORM: | |||
case PIPE_FORMAT_X8Z24_UNORM: | |||
spu.fb.zsize = 4; | |||
spu.fb.zscale = (float) 0x00ffffffu; | |||
break; | |||
@@ -282,6 +303,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) | |||
} | |||
#define NEW_FRAGMENT_FUNCTION 01 | |||
static void | |||
cmd_state_blend(const struct cell_command_blend *state) | |||
{ | |||
@@ -302,7 +325,9 @@ cmd_state_blend(const struct cell_command_blend *state) | |||
wait_on_mask(1 << TAG_BATCH_BUFFER); | |||
spu.blend = (blend_func) fb_blend_code_buffer; | |||
spu.read_fb = state->read_fb; | |||
} else { | |||
} | |||
else | |||
{ | |||
spu.read_fb = FALSE; | |||
} | |||
} | |||
@@ -326,7 +351,9 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat | |||
0, /* tid */ | |||
0 /* rid */); | |||
wait_on_mask(1 << TAG_BATCH_BUFFER); | |||
} else { | |||
} | |||
else | |||
{ | |||
/* If there is no code, emit a return instruction. | |||
*/ | |||
depth_stencil_code_buffer[0] = 0x35; | |||
@@ -338,12 +365,14 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat | |||
spu.frag_test = (frag_test_func) depth_stencil_code_buffer; | |||
spu.read_depth = state->read_depth; | |||
spu.read_stencil = state->read_stencil; | |||
spu.depth_stencil_alpha = state->state; | |||
} | |||
static void | |||
cmd_state_logicop(const struct cell_command_logicop * code) | |||
{ | |||
#if !NEW_FRAGMENT_FUNCTION | |||
mfc_get(logicop_code_buffer, | |||
(unsigned int) code->base, /* src */ | |||
code->size, | |||
@@ -353,6 +382,7 @@ cmd_state_logicop(const struct cell_command_logicop * code) | |||
wait_on_mask(1 << TAG_BATCH_BUFFER); | |||
spu.logicop = (logicop_func) logicop_code_buffer; | |||
#endif | |||
} | |||
@@ -455,7 +485,9 @@ cmd_finish(void) | |||
/** | |||
* Execute a batch of commands | |||
* Execute a batch of commands which was sent to us by the PPU. | |||
* See the cell_emit_state.c code to see where the commands come from. | |||
* | |||
* The opcode param encodes the location of the buffer and its size. | |||
*/ | |||
static void | |||
@@ -519,6 +551,14 @@ cmd_batch(uint opcode) | |||
pos += pos_incr; | |||
} | |||
break; | |||
case CELL_CMD_STATE_FRAGMENT_OPS: | |||
{ | |||
struct cell_command_fragment_ops *fops | |||
= (struct cell_command_fragment_ops *) &buffer[pos]; | |||
cmd_state_fragment_ops(fops); | |||
pos += sizeof(*fops) / 8; | |||
} | |||
break; | |||
case CELL_CMD_RELEASE_VERTS: | |||
{ | |||
struct cell_command_release_verts *release | |||
@@ -680,6 +720,11 @@ one_time_init(void) | |||
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); | |||
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); | |||
invalidate_tex_cache(); | |||
/* Install default/fallback fragment processing function. | |||
* This will normally be overriden by a code-gen'd function. | |||
*/ | |||
spu.fragment_ops.func = spu_fallback_fragment_ops; | |||
} | |||
@@ -91,6 +91,24 @@ typedef struct spu_blend_results (*logicop_func)( | |||
typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); | |||
typedef void (*spu_fragment_ops_func)(uint x, uint y, | |||
tile_t *colorTile, | |||
tile_t *depthStencilTile, | |||
vector float fragZ, | |||
vector float fragRed, | |||
vector float fragGreen, | |||
vector float fragBlue, | |||
vector float fragAlpha, | |||
vector unsigned int mask); | |||
struct spu_fragment_ops | |||
{ | |||
uint code[SPU_MAX_FRAGMENT_OPS_INSTS]; | |||
spu_fragment_ops_func func; /**< Current fragment ops function */ | |||
} ALIGN16_ATTRIB; | |||
struct spu_framebuffer { | |||
void *color_start; /**< addr of color surface in main memory */ | |||
void *depth_start; /**< addr of depth surface in main memory */ | |||
@@ -127,6 +145,9 @@ struct spu_global | |||
struct cell_init_info init; | |||
struct spu_framebuffer fb; | |||
struct pipe_depth_stencil_alpha_state depth_stencil_alpha; | |||
boolean read_depth; | |||
boolean read_stencil; | |||
frag_test_func frag_test; /**< Current depth/stencil test code */ | |||
@@ -142,6 +163,8 @@ struct spu_global | |||
struct vertex_info vertex_info; | |||
struct spu_fragment_ops fragment_ops; | |||
/* XXX more state to come */ | |||
@@ -29,8 +29,11 @@ | |||
* \author Ian Romanick <idr@us.ibm.com> | |||
*/ | |||
#include <transpose_matrix4x4.h> | |||
#include "pipe/p_format.h" | |||
#include "spu_main.h" | |||
#include "spu_colorpack.h" | |||
#include "spu_per_fragment_op.h" | |||
#define ZERO 0x80 | |||
@@ -90,7 +93,8 @@ read_ds_quad(tile_t *tile, unsigned x, unsigned y, | |||
break; | |||
} | |||
case PIPE_FORMAT_S8Z24_UNORM: { | |||
case PIPE_FORMAT_S8Z24_UNORM: | |||
case PIPE_FORMAT_X8Z24_UNORM: { | |||
qword *ptr = (qword *) &tile->ui4[iy][ix]; | |||
*depth = si_and(*ptr, si_fsmbi(0x7777)); | |||
@@ -153,7 +157,8 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, | |||
break; | |||
} | |||
case PIPE_FORMAT_S8Z24_UNORM: { | |||
case PIPE_FORMAT_S8Z24_UNORM: | |||
case PIPE_FORMAT_X8Z24_UNORM: { | |||
qword *ptr = (qword *) &buffer->ui4[iy][ix]; | |||
/* form select mask = 0111,0111,0111,0111 */ | |||
qword mask = si_fsmbi(0x7777); | |||
@@ -217,3 +222,225 @@ spu_do_depth_stencil(int x, int y, | |||
return result.mask; | |||
} | |||
/** | |||
* Called by rasterizer for each quad after the shader has run. This | |||
* is a fallback/debug function. In reality we'll use a generated | |||
* function produced by the PPU. But this function is useful for | |||
* debug/validation. | |||
*/ | |||
void | |||
spu_fallback_fragment_ops(uint x, uint y, | |||
tile_t *colorTile, | |||
tile_t *depthStencilTile, | |||
vector float fragZ, | |||
vector float fragRed, | |||
vector float fragGreen, | |||
vector float fragBlue, | |||
vector float fragAlpha, | |||
vector unsigned int mask) | |||
{ | |||
vector float frag_soa[4], frag_aos[4]; | |||
unsigned int c0, c1, c2, c3; | |||
/* do alpha test */ | |||
if (spu.depth_stencil_alpha.alpha.enabled) { | |||
vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); | |||
vector unsigned int amask; | |||
switch (spu.depth_stencil_alpha.alpha.func) { | |||
case PIPE_FUNC_LESS: | |||
amask = spu_cmpgt(ref, fragAlpha); /* mask = (fragAlpha < ref) */ | |||
break; | |||
case PIPE_FUNC_GREATER: | |||
amask = spu_cmpgt(fragAlpha, ref); /* mask = (fragAlpha > ref) */ | |||
break; | |||
case PIPE_FUNC_GEQUAL: | |||
amask = spu_cmpgt(ref, fragAlpha); | |||
amask = spu_nor(amask, amask); | |||
break; | |||
case PIPE_FUNC_LEQUAL: | |||
amask = spu_cmpgt(fragAlpha, ref); | |||
amask = spu_nor(amask, amask); | |||
break; | |||
case PIPE_FUNC_EQUAL: | |||
amask = spu_cmpeq(ref, fragAlpha); | |||
break; | |||
case PIPE_FUNC_NOTEQUAL: | |||
amask = spu_cmpeq(ref, fragAlpha); | |||
amask = spu_nor(amask, amask); | |||
break; | |||
case PIPE_FUNC_ALWAYS: | |||
amask = spu_splats(0xffffffffU); | |||
break; | |||
case PIPE_FUNC_NEVER: | |||
amask = spu_splats( 0x0U); | |||
break; | |||
default: | |||
; | |||
} | |||
mask = spu_and(mask, amask); | |||
} | |||
/* Z and/or stencil testing... */ | |||
if (spu.depth_stencil_alpha.depth.enabled || | |||
spu.depth_stencil_alpha.stencil[0].enabled) { | |||
/* get four Z/Stencil values from tile */ | |||
vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); | |||
vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; | |||
vector unsigned int ifbZ = spu_and(ifbZS, mask24); | |||
vector unsigned int ifbS = spu_andc(ifbZS, mask24); | |||
if (spu.depth_stencil_alpha.stencil[0].enabled) { | |||
/* do stencil test */ | |||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); | |||
} | |||
else if (spu.depth_stencil_alpha.depth.enabled) { | |||
/* do depth test */ | |||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || | |||
spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); | |||
vector unsigned int ifragZ; | |||
vector unsigned int zmask; | |||
/* convert four fragZ from float to uint */ | |||
fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); | |||
ifragZ = spu_convtu(fragZ, 0); | |||
/* do depth comparison, setting zmask with results */ | |||
switch (spu.depth_stencil_alpha.depth.func) { | |||
case PIPE_FUNC_LESS: | |||
zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ | |||
break; | |||
case PIPE_FUNC_GREATER: | |||
zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ | |||
break; | |||
case PIPE_FUNC_GEQUAL: | |||
zmask = spu_cmpgt(ifbZ, ifragZ); | |||
zmask = spu_nor(zmask, zmask); | |||
break; | |||
case PIPE_FUNC_LEQUAL: | |||
zmask = spu_cmpgt(ifragZ, ifbZ); | |||
zmask = spu_nor(zmask, zmask); | |||
break; | |||
case PIPE_FUNC_EQUAL: | |||
zmask = spu_cmpeq(ifbZ, ifragZ); | |||
break; | |||
case PIPE_FUNC_NOTEQUAL: | |||
zmask = spu_cmpeq(ifbZ, ifragZ); | |||
zmask = spu_nor(zmask, zmask); | |||
break; | |||
case PIPE_FUNC_ALWAYS: | |||
zmask = spu_splats(0xffffffffU); | |||
break; | |||
case PIPE_FUNC_NEVER: | |||
zmask = spu_splats( 0x0U); | |||
break; | |||
default: | |||
; | |||
} | |||
mask = spu_and(mask, zmask); | |||
/* merge framebuffer Z and fragment Z according to the mask */ | |||
ifbZ = spu_or(spu_and(ifragZ, mask), | |||
spu_andc(ifbZ, mask)); | |||
} | |||
if (spu_extract(spu_orx(mask), 0)) { | |||
/* put new fragment Z/Stencil values back into Z/Stencil tile */ | |||
depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); | |||
spu.cur_ztile_status = TILE_STATUS_DIRTY; | |||
} | |||
} | |||
/* XXX do blending here */ | |||
/* XXX do colormask test here */ | |||
if (spu_extract(spu_orx(mask), 0)) { | |||
spu.cur_ctile_status = TILE_STATUS_DIRTY; | |||
} | |||
else { | |||
return; | |||
} | |||
/* convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA */ | |||
#if 0 | |||
{ | |||
vector float frag_soa[4]; | |||
frag_soa[0] = fragRed; | |||
frag_soa[1] = fragGreen; | |||
frag_soa[2] = fragBlue; | |||
frag_soa[3] = fragAlpha; | |||
_transpose_matrix4x4(frag_aos, frag_soa); | |||
} | |||
#else | |||
/* short-cut relying on function parameter layout: */ | |||
_transpose_matrix4x4(frag_aos, &fragRed); | |||
(void) fragGreen; | |||
(void) fragBlue; | |||
#endif | |||
switch (spu.fb.color_format) { | |||
case PIPE_FORMAT_A8R8G8B8_UNORM: | |||
c0 = spu_pack_A8R8G8B8(frag_aos[0]); | |||
c1 = spu_pack_A8R8G8B8(frag_aos[1]); | |||
c2 = spu_pack_A8R8G8B8(frag_aos[2]); | |||
c3 = spu_pack_A8R8G8B8(frag_aos[3]); | |||
break; | |||
case PIPE_FORMAT_B8G8R8A8_UNORM: | |||
c0 = spu_pack_B8G8R8A8(frag_aos[0]); | |||
c1 = spu_pack_B8G8R8A8(frag_aos[1]); | |||
c2 = spu_pack_B8G8R8A8(frag_aos[2]); | |||
c3 = spu_pack_B8G8R8A8(frag_aos[3]); | |||
break; | |||
default: | |||
fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); | |||
ASSERT(0); | |||
} | |||
#if 0 | |||
/* | |||
* Quad layout: | |||
* +--+--+ | |||
* |p0|p1| | |||
* +--+--+ | |||
* |p2|p3| | |||
* +--+--+ | |||
*/ | |||
if (spu_extract(mask, 0)) | |||
colorTile->ui[y+0][x+0] = c0; | |||
if (spu_extract(mask, 1)) | |||
colorTile->ui[y+0][x+1] = c1; | |||
if (spu_extract(mask, 2)) | |||
colorTile->ui[y+1][x+0] = c2; | |||
if (spu_extract(mask, 3)) | |||
colorTile->ui[y+1][x+1] = c3; | |||
#else | |||
/* | |||
* Quad layout: | |||
* +--+--+--+--+ | |||
* |p0|p1|p2|p3| | |||
* +--+--+--+--+ | |||
*/ | |||
if (spu_extract(mask, 0)) | |||
colorTile->ui[y][x*2] = c0; | |||
if (spu_extract(mask, 1)) | |||
colorTile->ui[y][x*2+1] = c1; | |||
if (spu_extract(mask, 2)) | |||
colorTile->ui[y][x*2+2] = c2; | |||
if (spu_extract(mask, 3)) | |||
colorTile->ui[y][x*2+3] = c3; | |||
#endif | |||
} |
@@ -29,4 +29,15 @@ extern qword | |||
spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, | |||
qword frag_alpha, qword facing); | |||
extern void | |||
spu_fallback_fragment_ops(uint x, uint y, | |||
tile_t *colorTile, | |||
tile_t *depthStencilTile, | |||
vector float fragZ, | |||
vector float fragRed, | |||
vector float fragGreen, | |||
vector float fragBlue, | |||
vector float fragAlpha, | |||
vector unsigned int mask); | |||
#endif /* SPU_PER_FRAGMENT_OP */ |
@@ -297,9 +297,12 @@ emit_quad( int x, int y, mask_t mask ) | |||
sp->quad.first->run(sp->quad.first, &setup.quad); | |||
#else | |||
#define NEW_FRAGMENT_FUNCTION 01 | |||
#if !NEW_FRAGMENT_FUNCTION | |||
if (spu.read_depth) { | |||
mask = do_depth_test(x, y, mask); | |||
} | |||
#endif | |||
/* If any bits in mask are set... */ | |||
if (spu_extract(spu_orx(mask), 0)) { | |||
@@ -308,6 +311,7 @@ emit_quad( int x, int y, mask_t mask ) | |||
vector float colors[4]; | |||
spu.cur_ctile_status = TILE_STATUS_DIRTY; | |||
spu.cur_ztile_status = TILE_STATUS_DIRTY; | |||
if (spu.texture[0].start) { | |||
/* texture mapping */ | |||
@@ -355,6 +359,29 @@ emit_quad( int x, int y, mask_t mask ) | |||
} | |||
#if NEW_FRAGMENT_FUNCTION | |||
{ | |||
/* Convert fragment data from AoS to SoA format. | |||
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) | |||
* This is temporary! | |||
*/ | |||
vector float soa_frag[4]; | |||
_transpose_matrix4x4(soa_frag, colors); | |||
float4 fragZ; | |||
fragZ.v = eval_z((float) x, (float) y); | |||
/* Do all per-fragment/quad operations here, including: | |||
* alpha test, z test, stencil test, blend and framebuffer writing. | |||
*/ | |||
spu.fragment_ops.func(ix, iy, &spu.ctile, &spu.ztile, | |||
fragZ.v, | |||
soa_frag[0], soa_frag[1], | |||
soa_frag[2], soa_frag[3], | |||
mask); | |||
} | |||
#else | |||
/* Convert fragment data from AoS to SoA format. | |||
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) | |||
*/ | |||
@@ -405,6 +432,9 @@ emit_quad( int x, int y, mask_t mask ) | |||
spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0); | |||
spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0); | |||
spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0); | |||
#endif /* NEW_FRAGMENT_FUNCTION */ | |||
} | |||
#endif | |||
} |
@@ -349,12 +349,17 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type, | |||
if (vis->mesa_visual.depthBits == 0) | |||
depthFormat = PIPE_FORMAT_NONE; | |||
#ifdef GALLIUM_CELL /* XXX temporary for Cell! */ | |||
else | |||
depthFormat = PIPE_FORMAT_S8Z24_UNORM; | |||
#else | |||
else if (vis->mesa_visual.depthBits <= 16) | |||
depthFormat = PIPE_FORMAT_Z16_UNORM; | |||
depthFormat = PIPE_FORMAT_Z16UNORM; | |||
else if (vis->mesa_visual.depthBits <= 24) | |||
depthFormat = PIPE_FORMAT_S8Z24_UNORM; | |||
else | |||
depthFormat = PIPE_FORMAT_Z32_UNORM; | |||
#endif | |||
if (vis->mesa_visual.stencilBits == 8) { | |||
if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) |
@@ -275,6 +275,39 @@ xm_buffer_destroy(struct pipe_winsys *pws, | |||
} | |||
/** | |||
* For Cell. Basically, rearrange the pixels/quads from this layout: | |||
* +--+--+--+--+ | |||
* |p0|p1|p2|p3|.... | |||
* +--+--+--+--+ | |||
* | |||
* to this layout: | |||
* +--+--+ | |||
* |p0|p1|.... | |||
* +--+--+ | |||
* |p2|p3| | |||
* +--+--+ | |||
*/ | |||
static void | |||
twiddle_tile(uint *tile) | |||
{ | |||
uint tile2[TILE_SIZE * TILE_SIZE]; | |||
int y, x; | |||
for (y = 0; y < TILE_SIZE; y+=2) { | |||
for (x = 0; x < TILE_SIZE; x+=2) { | |||
int k = 4 * (y/2 * TILE_SIZE/2 + x/2); | |||
tile2[y * TILE_SIZE + (x + 0)] = tile[k]; | |||
tile2[y * TILE_SIZE + (x + 1)] = tile[k+1]; | |||
tile2[(y + 1) * TILE_SIZE + (x + 0)] = tile[k+2]; | |||
tile2[(y + 1) * TILE_SIZE + (x + 1)] = tile[k+3]; | |||
} | |||
} | |||
memcpy(tile, tile2, sizeof(tile2)); | |||
} | |||
/** | |||
* Display a surface that's in a tiled configuration. That is, all the | |||
* pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory. | |||
@@ -321,6 +354,8 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) | |||
ximage->data = (char *) xm_buf->data + offset; | |||
twiddle_tile((uint *) ximage->data); | |||
if (XSHM_ENABLED(xm_buf)) { | |||
#if defined(USE_XSHM) && !defined(XFree86Server) | |||
XShmPutImage(b->xm_visual->display, b->drawable, b->gc, |