All of the code is wired in on the SPU side, but it is not called from the PPU yet. Instruction / declaration fetch still needs to be implemented in spu_exec.c.tags/mesa_20090313
@@ -83,6 +83,9 @@ | |||
#define CELL_CMD_STATE_SAMPLER 12 | |||
#define CELL_CMD_STATE_TEXTURE 13 | |||
#define CELL_CMD_STATE_VERTEX_INFO 14 | |||
#define CELL_CMD_STATE_VIEWPORT 15 | |||
#define CELL_CMD_STATE_VS_ARRAY_INFO 16 | |||
#define CELL_CMD_VS_EXECUTE 17 | |||
#define CELL_NUM_BUFFERS 4 | |||
@@ -116,6 +119,41 @@ struct cell_command_clear_surface | |||
} ALIGN16_ATTRIB; | |||
/** | |||
* Array info used by the vertex shader's vertex puller. | |||
*/ | |||
struct cell_array_info | |||
{ | |||
void *base; /**< Base address of the 0th element. */ | |||
uint attr; /**< Attribute that this state if for. */ | |||
uint pitch; /**< Byte pitch from one entry to the next. */ | |||
enum pipe_format format; /**< Pipe format of each entry. */ | |||
} ALIGN16_ATTRIB; | |||
struct cell_shader_info | |||
{ | |||
unsigned processor; | |||
unsigned num_outputs; | |||
void *declarations; | |||
unsigned num_declarations; | |||
void *instructions; | |||
unsigned num_instructions; | |||
void *uniforms; | |||
} ALIGN16_ATTRIB; | |||
struct cell_command_vs | |||
{ | |||
struct cell_shader_info shader; | |||
void *elts; | |||
unsigned num_elts; | |||
unsigned bytes_per_elt; | |||
void *vOut; | |||
} ALIGN16_ATTRIB; | |||
struct cell_command_render | |||
{ | |||
uint opcode; /**< CELL_CMD_RENDER */ |
@@ -20,7 +20,11 @@ SOURCES = \ | |||
spu_render.c \ | |||
spu_texture.c \ | |||
spu_tile.c \ | |||
spu_tri.c | |||
spu_tri.c \ | |||
spu_exec.c \ | |||
spu_util.c \ | |||
spu_vertex_fetch.c \ | |||
spu_vertex_shader.c | |||
SPU_OBJECTS = $(SOURCES:.c=.o) \ | |||
@@ -0,0 +1,171 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
#if !defined SPU_EXEC_H | |||
#define SPU_EXEC_H | |||
#include "pipe/p_compiler.h" | |||
#include "pipe/tgsi/exec/tgsi_exec.h" | |||
#if defined __cplusplus | |||
extern "C" { | |||
#endif | |||
/** | |||
* Registers may be treated as float, signed int or unsigned int. | |||
*/ | |||
union spu_exec_channel | |||
{ | |||
float f[QUAD_SIZE]; | |||
int i[QUAD_SIZE]; | |||
unsigned u[QUAD_SIZE]; | |||
}; | |||
/** | |||
* A vector[RGBA] of channels[4 pixels] | |||
*/ | |||
struct spu_exec_vector | |||
{ | |||
union spu_exec_channel xyzw[NUM_CHANNELS]; | |||
}; | |||
/** | |||
* For fragment programs, information for computing fragment input | |||
* values from plane equation of the triangle/line. | |||
*/ | |||
struct spu_interp_coef | |||
{ | |||
float a0[NUM_CHANNELS]; /* in an xyzw layout */ | |||
float dadx[NUM_CHANNELS]; | |||
float dady[NUM_CHANNELS]; | |||
}; | |||
struct softpipe_tile_cache; /**< Opaque to TGSI */ | |||
/** | |||
* Information for sampling textures, which must be implemented | |||
* by code outside the TGSI executor. | |||
*/ | |||
struct spu_sampler | |||
{ | |||
const struct pipe_sampler_state *state; | |||
struct pipe_texture *texture; | |||
/** Get samples for four fragments in a quad */ | |||
void (*get_samples)(struct spu_sampler *sampler, | |||
const float s[QUAD_SIZE], | |||
const float t[QUAD_SIZE], | |||
const float p[QUAD_SIZE], | |||
float lodbias, | |||
float rgba[NUM_CHANNELS][QUAD_SIZE]); | |||
void *pipe; /*XXX temporary*/ | |||
struct softpipe_tile_cache *cache; | |||
}; | |||
/** | |||
* Run-time virtual machine state for executing TGSI shader. | |||
*/ | |||
struct spu_exec_machine | |||
{ | |||
/* | |||
* 32 program temporaries | |||
* 4 internal temporaries | |||
* 1 address | |||
*/ | |||
struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS | |||
+ TGSI_EXEC_NUM_ADDRS + 1] | |||
ALIGN16_ATTRIB; | |||
struct spu_exec_vector *Addrs; | |||
struct spu_sampler *Samplers; | |||
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; | |||
unsigned ImmLimit; | |||
float (*Consts)[4]; | |||
struct spu_exec_vector *Inputs; | |||
struct spu_exec_vector *Outputs; | |||
unsigned Processor; | |||
/* GEOMETRY processor only. */ | |||
unsigned *Primitives; | |||
/* FRAGMENT processor only. */ | |||
const struct spu_interp_coef *InterpCoefs; | |||
struct spu_exec_vector QuadPos; | |||
/* Conditional execution masks */ | |||
uint CondMask; /**< For IF/ELSE/ENDIF */ | |||
uint LoopMask; /**< For BGNLOOP/ENDLOOP */ | |||
uint ContMask; /**< For loop CONT statements */ | |||
uint FuncMask; /**< For function calls */ | |||
uint ExecMask; /**< = CondMask & LoopMask */ | |||
/** Condition mask stack (for nested conditionals) */ | |||
uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; | |||
int CondStackTop; | |||
/** Loop mask stack (for nested loops) */ | |||
uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; | |||
int LoopStackTop; | |||
/** Loop continue mask stack (see comments in tgsi_exec.c) */ | |||
uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; | |||
int ContStackTop; | |||
/** Function execution mask stack (for executing subroutine code) */ | |||
uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; | |||
int FuncStackTop; | |||
/** Function call stack for saving/restoring the program counter */ | |||
uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; | |||
int CallStackTop; | |||
struct tgsi_full_instruction *Instructions; | |||
uint NumInstructions; | |||
struct tgsi_full_declaration *Declarations; | |||
uint NumDeclarations; | |||
}; | |||
extern void | |||
spu_exec_machine_init(struct spu_exec_machine *mach, | |||
uint numSamplers, | |||
struct spu_sampler *samplers, | |||
unsigned processor); | |||
extern uint | |||
spu_exec_machine_run( struct spu_exec_machine *mach ); | |||
#if defined __cplusplus | |||
} /* extern "C" */ | |||
#endif | |||
#endif /* SPU_EXEC_H */ |
@@ -36,6 +36,7 @@ | |||
#include "spu_render.h" | |||
#include "spu_texture.h" | |||
#include "spu_tile.h" | |||
#include "spu_vertex_shader.h" | |||
#include "pipe/cell/common.h" | |||
#include "pipe/p_defines.h" | |||
@@ -50,6 +51,7 @@ boolean Debug = FALSE; | |||
struct spu_global spu; | |||
struct spu_vs_context draw; | |||
/** | |||
* Tell the PPU that this SPU has finished copying a buffer to | |||
@@ -264,6 +266,18 @@ cmd_state_vertex_info(const struct vertex_info *vinfo) | |||
} | |||
static void | |||
cmd_state_vs_array_info(const struct cell_array_info *vs_info) | |||
{ | |||
const unsigned attr = vs_info->attr; | |||
ASSERT(attr < PIPE_ATTRIB_MAX); | |||
draw.vertex_fetch.src_ptr[attr] = vs_info->base; | |||
draw.vertex_fetch.pitch[attr] = vs_info->pitch; | |||
draw.vertex_fetch.format[attr] = vs_info->format; | |||
draw.vertex_fetch.dirty = 1; | |||
} | |||
static void | |||
cmd_finish(void) | |||
@@ -374,6 +388,20 @@ cmd_batch(uint opcode) | |||
cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); | |||
pos += (1 + sizeof(struct vertex_info) / 4); | |||
break; | |||
case CELL_CMD_STATE_VIEWPORT: | |||
(void) memcpy(& draw.viewport, &buffer[pos+1], | |||
sizeof(struct pipe_viewport_state)); | |||
pos += (1 + sizeof(struct pipe_viewport_state) / 4); | |||
break; | |||
case CELL_CMD_STATE_VS_ARRAY_INFO: | |||
cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); | |||
pos += (1 + sizeof(struct cell_array_info) / 4); | |||
break; | |||
case CELL_CMD_VS_EXECUTE: | |||
spu_execute_vertex_shader(&draw, | |||
(struct cell_command_vs *) &buffer[pos+1]); | |||
pos += (1 + sizeof(struct cell_command_vs) / 4); | |||
break; | |||
default: | |||
printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, buffer[pos]); | |||
ASSERT(0); |
@@ -0,0 +1,165 @@ | |||
#include "pipe/p_util.h" | |||
#include "pipe/p_shader_tokens.h" | |||
#include "pipe/tgsi/util/tgsi_parse.h" | |||
//#include "tgsi_build.h" | |||
#include "pipe/tgsi/util/tgsi_util.h" | |||
unsigned | |||
tgsi_util_get_src_register_swizzle( | |||
const struct tgsi_src_register *reg, | |||
unsigned component ) | |||
{ | |||
switch( component ) { | |||
case 0: | |||
return reg->SwizzleX; | |||
case 1: | |||
return reg->SwizzleY; | |||
case 2: | |||
return reg->SwizzleZ; | |||
case 3: | |||
return reg->SwizzleW; | |||
default: | |||
assert( 0 ); | |||
} | |||
return 0; | |||
} | |||
unsigned | |||
tgsi_util_get_src_register_extswizzle( | |||
const struct tgsi_src_register_ext_swz *reg, | |||
unsigned component ) | |||
{ | |||
switch( component ) { | |||
case 0: | |||
return reg->ExtSwizzleX; | |||
case 1: | |||
return reg->ExtSwizzleY; | |||
case 2: | |||
return reg->ExtSwizzleZ; | |||
case 3: | |||
return reg->ExtSwizzleW; | |||
default: | |||
assert( 0 ); | |||
} | |||
return 0; | |||
} | |||
unsigned | |||
tgsi_util_get_full_src_register_extswizzle( | |||
const struct tgsi_full_src_register *reg, | |||
unsigned component ) | |||
{ | |||
unsigned swizzle; | |||
/* | |||
* First, calculate the extended swizzle for a given channel. This will give | |||
* us either a channel index into the simple swizzle or a constant 1 or 0. | |||
*/ | |||
swizzle = tgsi_util_get_src_register_extswizzle( | |||
®->SrcRegisterExtSwz, | |||
component ); | |||
assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); | |||
assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); | |||
assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); | |||
assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); | |||
assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); | |||
assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); | |||
/* | |||
* Second, calculate the simple swizzle for the unswizzled channel index. | |||
* Leave the constants intact, they are not affected by the simple swizzle. | |||
*/ | |||
if( swizzle <= TGSI_SWIZZLE_W ) { | |||
swizzle = tgsi_util_get_src_register_swizzle( | |||
®->SrcRegister, | |||
component ); | |||
} | |||
return swizzle; | |||
} | |||
unsigned | |||
tgsi_util_get_src_register_extnegate( | |||
const struct tgsi_src_register_ext_swz *reg, | |||
unsigned component ) | |||
{ | |||
switch( component ) { | |||
case 0: | |||
return reg->NegateX; | |||
case 1: | |||
return reg->NegateY; | |||
case 2: | |||
return reg->NegateZ; | |||
case 3: | |||
return reg->NegateW; | |||
default: | |||
assert( 0 ); | |||
} | |||
return 0; | |||
} | |||
void | |||
tgsi_util_set_src_register_extnegate( | |||
struct tgsi_src_register_ext_swz *reg, | |||
unsigned negate, | |||
unsigned component ) | |||
{ | |||
switch( component ) { | |||
case 0: | |||
reg->NegateX = negate; | |||
break; | |||
case 1: | |||
reg->NegateY = negate; | |||
break; | |||
case 2: | |||
reg->NegateZ = negate; | |||
break; | |||
case 3: | |||
reg->NegateW = negate; | |||
break; | |||
default: | |||
assert( 0 ); | |||
} | |||
} | |||
unsigned | |||
tgsi_util_get_full_src_register_sign_mode( | |||
const struct tgsi_full_src_register *reg, | |||
unsigned component ) | |||
{ | |||
unsigned sign_mode; | |||
if( reg->SrcRegisterExtMod.Absolute ) { | |||
/* Consider only the post-abs negation. */ | |||
if( reg->SrcRegisterExtMod.Negate ) { | |||
sign_mode = TGSI_UTIL_SIGN_SET; | |||
} | |||
else { | |||
sign_mode = TGSI_UTIL_SIGN_CLEAR; | |||
} | |||
} | |||
else { | |||
/* Accumulate the three negations. */ | |||
unsigned negate; | |||
negate = reg->SrcRegister.Negate; | |||
if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { | |||
negate = !negate; | |||
} | |||
if( reg->SrcRegisterExtMod.Negate ) { | |||
negate = !negate; | |||
} | |||
if( negate ) { | |||
sign_mode = TGSI_UTIL_SIGN_TOGGLE; | |||
} | |||
else { | |||
sign_mode = TGSI_UTIL_SIGN_KEEP; | |||
} | |||
} | |||
return sign_mode; | |||
} |
@@ -0,0 +1,493 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/* | |||
* Authors: | |||
* Keith Whitwell <keith@tungstengraphics.com> | |||
*/ | |||
#include "pipe/p_util.h" | |||
#include "pipe/p_state.h" | |||
#include "pipe/p_shader_tokens.h" | |||
#include "spu_exec.h" | |||
#include "spu_vertex_shader.h" | |||
#define DRAW_DBG 0 | |||
/** | |||
* Fetch a float[4] vertex attribute from memory, doing format/type | |||
* conversion as needed. | |||
* | |||
* This is probably needed/dupliocated elsewhere, eg format | |||
* conversion, texture sampling etc. | |||
*/ | |||
#define FETCH_ATTRIB( NAME, SZ, CVT ) \ | |||
static void \ | |||
fetch_##NAME(const void *ptr, float *attrib) \ | |||
{ \ | |||
static const float defaults[4] = { 0,0,0,1 }; \ | |||
int i; \ | |||
\ | |||
for (i = 0; i < SZ; i++) { \ | |||
attrib[i] = CVT; \ | |||
} \ | |||
\ | |||
for (; i < 4; i++) { \ | |||
attrib[i] = defaults[i]; \ | |||
} \ | |||
} | |||
#define CVT_64_FLOAT (float) ((double *) ptr)[i] | |||
#define CVT_32_FLOAT ((float *) ptr)[i] | |||
#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] | |||
#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] | |||
#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] | |||
#define CVT_8_SSCALED (float) ((char *) ptr)[i] | |||
#define CVT_16_SSCALED (float) ((short *) ptr)[i] | |||
#define CVT_32_SSCALED (float) ((int *) ptr)[i] | |||
#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f | |||
#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f | |||
#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f | |||
#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f | |||
#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f | |||
#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f | |||
FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) | |||
FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) | |||
FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) | |||
FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) | |||
FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) | |||
FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) | |||
FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) | |||
FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) | |||
FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) | |||
FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) | |||
FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) | |||
FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) | |||
FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) | |||
FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) | |||
FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) | |||
FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) | |||
FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) | |||
FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) | |||
FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) | |||
FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) | |||
FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) | |||
FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) | |||
FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) | |||
FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) | |||
FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) | |||
FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) | |||
FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) | |||
FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) | |||
FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) | |||
FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) | |||
FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) | |||
FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) | |||
FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) | |||
FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) | |||
FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) | |||
FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) | |||
FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) | |||
FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) | |||
FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) | |||
FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) | |||
FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) | |||
FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) | |||
FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) | |||
FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) | |||
FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) | |||
FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) | |||
FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) | |||
FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) | |||
FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) | |||
FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) | |||
FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) | |||
FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) | |||
FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) | |||
FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) | |||
FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) | |||
FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) | |||
FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) | |||
//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) | |||
static spu_fetch_func get_fetch_func( enum pipe_format format ) | |||
{ | |||
#if 0 | |||
{ | |||
char tmp[80]; | |||
pf_sprint_name(tmp, format); | |||
_mesa_printf("%s: %s\n", __FUNCTION__, tmp); | |||
} | |||
#endif | |||
switch (format) { | |||
case PIPE_FORMAT_R64_FLOAT: | |||
return fetch_R64_FLOAT; | |||
case PIPE_FORMAT_R64G64_FLOAT: | |||
return fetch_R64G64_FLOAT; | |||
case PIPE_FORMAT_R64G64B64_FLOAT: | |||
return fetch_R64G64B64_FLOAT; | |||
case PIPE_FORMAT_R64G64B64A64_FLOAT: | |||
return fetch_R64G64B64A64_FLOAT; | |||
case PIPE_FORMAT_R32_FLOAT: | |||
return fetch_R32_FLOAT; | |||
case PIPE_FORMAT_R32G32_FLOAT: | |||
return fetch_R32G32_FLOAT; | |||
case PIPE_FORMAT_R32G32B32_FLOAT: | |||
return fetch_R32G32B32_FLOAT; | |||
case PIPE_FORMAT_R32G32B32A32_FLOAT: | |||
return fetch_R32G32B32A32_FLOAT; | |||
case PIPE_FORMAT_R32_UNORM: | |||
return fetch_R32_UNORM; | |||
case PIPE_FORMAT_R32G32_UNORM: | |||
return fetch_R32G32_UNORM; | |||
case PIPE_FORMAT_R32G32B32_UNORM: | |||
return fetch_R32G32B32_UNORM; | |||
case PIPE_FORMAT_R32G32B32A32_UNORM: | |||
return fetch_R32G32B32A32_UNORM; | |||
case PIPE_FORMAT_R32_USCALED: | |||
return fetch_R32_USCALED; | |||
case PIPE_FORMAT_R32G32_USCALED: | |||
return fetch_R32G32_USCALED; | |||
case PIPE_FORMAT_R32G32B32_USCALED: | |||
return fetch_R32G32B32_USCALED; | |||
case PIPE_FORMAT_R32G32B32A32_USCALED: | |||
return fetch_R32G32B32A32_USCALED; | |||
case PIPE_FORMAT_R32_SNORM: | |||
return fetch_R32_SNORM; | |||
case PIPE_FORMAT_R32G32_SNORM: | |||
return fetch_R32G32_SNORM; | |||
case PIPE_FORMAT_R32G32B32_SNORM: | |||
return fetch_R32G32B32_SNORM; | |||
case PIPE_FORMAT_R32G32B32A32_SNORM: | |||
return fetch_R32G32B32A32_SNORM; | |||
case PIPE_FORMAT_R32_SSCALED: | |||
return fetch_R32_SSCALED; | |||
case PIPE_FORMAT_R32G32_SSCALED: | |||
return fetch_R32G32_SSCALED; | |||
case PIPE_FORMAT_R32G32B32_SSCALED: | |||
return fetch_R32G32B32_SSCALED; | |||
case PIPE_FORMAT_R32G32B32A32_SSCALED: | |||
return fetch_R32G32B32A32_SSCALED; | |||
case PIPE_FORMAT_R16_UNORM: | |||
return fetch_R16_UNORM; | |||
case PIPE_FORMAT_R16G16_UNORM: | |||
return fetch_R16G16_UNORM; | |||
case PIPE_FORMAT_R16G16B16_UNORM: | |||
return fetch_R16G16B16_UNORM; | |||
case PIPE_FORMAT_R16G16B16A16_UNORM: | |||
return fetch_R16G16B16A16_UNORM; | |||
case PIPE_FORMAT_R16_USCALED: | |||
return fetch_R16_USCALED; | |||
case PIPE_FORMAT_R16G16_USCALED: | |||
return fetch_R16G16_USCALED; | |||
case PIPE_FORMAT_R16G16B16_USCALED: | |||
return fetch_R16G16B16_USCALED; | |||
case PIPE_FORMAT_R16G16B16A16_USCALED: | |||
return fetch_R16G16B16A16_USCALED; | |||
case PIPE_FORMAT_R16_SNORM: | |||
return fetch_R16_SNORM; | |||
case PIPE_FORMAT_R16G16_SNORM: | |||
return fetch_R16G16_SNORM; | |||
case PIPE_FORMAT_R16G16B16_SNORM: | |||
return fetch_R16G16B16_SNORM; | |||
case PIPE_FORMAT_R16G16B16A16_SNORM: | |||
return fetch_R16G16B16A16_SNORM; | |||
case PIPE_FORMAT_R16_SSCALED: | |||
return fetch_R16_SSCALED; | |||
case PIPE_FORMAT_R16G16_SSCALED: | |||
return fetch_R16G16_SSCALED; | |||
case PIPE_FORMAT_R16G16B16_SSCALED: | |||
return fetch_R16G16B16_SSCALED; | |||
case PIPE_FORMAT_R16G16B16A16_SSCALED: | |||
return fetch_R16G16B16A16_SSCALED; | |||
case PIPE_FORMAT_R8_UNORM: | |||
return fetch_R8_UNORM; | |||
case PIPE_FORMAT_R8G8_UNORM: | |||
return fetch_R8G8_UNORM; | |||
case PIPE_FORMAT_R8G8B8_UNORM: | |||
return fetch_R8G8B8_UNORM; | |||
case PIPE_FORMAT_R8G8B8A8_UNORM: | |||
return fetch_R8G8B8A8_UNORM; | |||
case PIPE_FORMAT_R8_USCALED: | |||
return fetch_R8_USCALED; | |||
case PIPE_FORMAT_R8G8_USCALED: | |||
return fetch_R8G8_USCALED; | |||
case PIPE_FORMAT_R8G8B8_USCALED: | |||
return fetch_R8G8B8_USCALED; | |||
case PIPE_FORMAT_R8G8B8A8_USCALED: | |||
return fetch_R8G8B8A8_USCALED; | |||
case PIPE_FORMAT_R8_SNORM: | |||
return fetch_R8_SNORM; | |||
case PIPE_FORMAT_R8G8_SNORM: | |||
return fetch_R8G8_SNORM; | |||
case PIPE_FORMAT_R8G8B8_SNORM: | |||
return fetch_R8G8B8_SNORM; | |||
case PIPE_FORMAT_R8G8B8A8_SNORM: | |||
return fetch_R8G8B8A8_SNORM; | |||
case PIPE_FORMAT_R8_SSCALED: | |||
return fetch_R8_SSCALED; | |||
case PIPE_FORMAT_R8G8_SSCALED: | |||
return fetch_R8G8_SSCALED; | |||
case PIPE_FORMAT_R8G8B8_SSCALED: | |||
return fetch_R8G8B8_SSCALED; | |||
case PIPE_FORMAT_R8G8B8A8_SSCALED: | |||
return fetch_R8G8B8A8_SSCALED; | |||
case PIPE_FORMAT_A8R8G8B8_UNORM: | |||
return fetch_A8R8G8B8_UNORM; | |||
case 0: | |||
return NULL; /* not sure why this is needed */ | |||
default: | |||
assert(0); | |||
return NULL; | |||
} | |||
} | |||
static void | |||
transpose_4x4( float *out, const float *in ) | |||
{ | |||
/* This can be achieved in 12 sse instructions, plus the final | |||
* stores I guess. This is probably a bit more than that - maybe | |||
* 32 or so? | |||
*/ | |||
out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12]; | |||
out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13]; | |||
out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14]; | |||
out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15]; | |||
} | |||
static void fetch_xyz_rgb( struct spu_vs_context *draw, | |||
struct spu_exec_machine *machine, | |||
const unsigned *elts, | |||
unsigned count ) | |||
{ | |||
assert(count <= 4); | |||
// _mesa_printf("%s\n", __FUNCTION__); | |||
/* loop over vertex attributes (vertex shader inputs) | |||
*/ | |||
const unsigned *pitch = draw->vertex_fetch.pitch; | |||
const ubyte **src = draw->vertex_fetch.src_ptr; | |||
int i; | |||
for (i = 0; i < 4; i++) { | |||
{ | |||
const float *in = (const float *)(src[0] + elts[i] * pitch[0]); | |||
float *out = &machine->Inputs[0].xyzw[0].f[i]; | |||
out[0] = in[0]; | |||
out[4] = in[1]; | |||
out[8] = in[2]; | |||
out[12] = 1.0f; | |||
} | |||
{ | |||
const float *in = (const float *)(src[1] + elts[i] * pitch[1]); | |||
float *out = &machine->Inputs[1].xyzw[0].f[i]; | |||
out[0] = in[0]; | |||
out[4] = in[1]; | |||
out[8] = in[2]; | |||
out[12] = 1.0f; | |||
} | |||
} | |||
} | |||
static void fetch_xyz_rgb_st( struct spu_vs_context *draw, | |||
struct spu_exec_machine *machine, | |||
const unsigned *elts, | |||
unsigned count ) | |||
{ | |||
assert(count <= 4); | |||
/* loop over vertex attributes (vertex shader inputs) | |||
*/ | |||
const unsigned *pitch = draw->vertex_fetch.pitch; | |||
const ubyte **src = draw->vertex_fetch.src_ptr; | |||
int i; | |||
for (i = 0; i < 4; i++) { | |||
{ | |||
const float *in = (const float *)(src[0] + elts[i] * pitch[0]); | |||
float *out = &machine->Inputs[0].xyzw[0].f[i]; | |||
out[0] = in[0]; | |||
out[4] = in[1]; | |||
out[8] = in[2]; | |||
out[12] = 1.0f; | |||
} | |||
{ | |||
const float *in = (const float *)(src[1] + elts[i] * pitch[1]); | |||
float *out = &machine->Inputs[1].xyzw[0].f[i]; | |||
out[0] = in[0]; | |||
out[4] = in[1]; | |||
out[8] = in[2]; | |||
out[12] = 1.0f; | |||
} | |||
{ | |||
const float *in = (const float *)(src[2] + elts[i] * pitch[2]); | |||
float *out = &machine->Inputs[1].xyzw[0].f[i]; | |||
out[0] = in[0]; | |||
out[4] = in[1]; | |||
out[8] = 0.0f; | |||
out[12] = 1.0f; | |||
} | |||
} | |||
} | |||
/** | |||
* Fetch vertex attributes for 'count' vertices. | |||
*/ | |||
static void generic_vertex_fetch( struct spu_vs_context *draw, | |||
struct spu_exec_machine *machine, | |||
const unsigned *elts, | |||
unsigned count ) | |||
{ | |||
unsigned nr_attrs = draw->vertex_fetch.nr_attrs; | |||
unsigned attr; | |||
assert(count <= 4); | |||
// _mesa_printf("%s %d\n", __FUNCTION__, count); | |||
/* loop over vertex attributes (vertex shader inputs) | |||
*/ | |||
for (attr = 0; attr < nr_attrs; attr++) { | |||
const unsigned pitch = draw->vertex_fetch.pitch[attr]; | |||
const ubyte *src = draw->vertex_fetch.src_ptr[attr]; | |||
const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; | |||
unsigned i; | |||
float p[4][4]; | |||
/* Fetch four attributes for four vertices. | |||
* | |||
* Could fetch directly into AOS format, but this is meant to be | |||
* a prototype for an sse implementation, which would have | |||
* difficulties doing that. | |||
*/ | |||
for (i = 0; i < count; i++) | |||
fetch( src + elts[i] * pitch, p[i] ); | |||
/* Be nice and zero out any missing vertices: | |||
*/ | |||
for (/* empty */; i < 4; i++) | |||
p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0; | |||
/* Transpose/swizzle into sse-friendly format. Currently | |||
* assuming that all vertex shader inputs are float[4], but this | |||
* isn't true -- if the vertex shader only wants tex0.xy, we | |||
* could optimize for that. | |||
* | |||
* To do so fully without codegen would probably require an | |||
* excessive number of fetch functions, but we could at least | |||
* minimize the transpose step: | |||
*/ | |||
transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p ); | |||
} | |||
} | |||
void spu_update_vertex_fetch( struct spu_vs_context *draw ) | |||
{ | |||
unsigned i; | |||
for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { | |||
draw->vertex_fetch.fetch[i] = | |||
get_fetch_func(draw->vertex_fetch.format[i]); | |||
} | |||
draw->vertex_fetch.fetch_func = generic_vertex_fetch; | |||
switch (draw->vertex_fetch.nr_attrs) { | |||
case 2: | |||
if (draw->vertex_fetch.format[0] == PIPE_FORMAT_R32G32B32_FLOAT && | |||
draw->vertex_fetch.format[1] == PIPE_FORMAT_R32G32B32_FLOAT) | |||
draw->vertex_fetch.fetch_func = fetch_xyz_rgb; | |||
break; | |||
case 3: | |||
if (draw->vertex_fetch.format[0] == PIPE_FORMAT_R32G32B32_FLOAT && | |||
draw->vertex_fetch.format[1] == PIPE_FORMAT_R32G32B32_FLOAT && | |||
draw->vertex_fetch.format[2] == PIPE_FORMAT_R32G32_FLOAT) | |||
draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st; | |||
break; | |||
default: | |||
break; | |||
} | |||
} |
@@ -0,0 +1,224 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/* | |||
* Authors: | |||
* Keith Whitwell <keith@tungstengraphics.com> | |||
* Brian Paul | |||
* Ian Romanick <idr@us.ibm.com> | |||
*/ | |||
#include "pipe/p_util.h" | |||
#include "pipe/p_state.h" | |||
#include "pipe/p_shader_tokens.h" | |||
#include "spu_vertex_shader.h" | |||
#include "spu_exec.h" | |||
#include "pipe/draw/draw_private.h" | |||
#include "pipe/draw/draw_context.h" | |||
#include "pipe/cell/common.h" | |||
#define DBG_VS 0 | |||
static INLINE unsigned | |||
compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) | |||
{ | |||
unsigned mask = 0; | |||
unsigned i; | |||
/* Do the hardwired planes first: | |||
*/ | |||
if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; | |||
if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; | |||
if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; | |||
if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; | |||
if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; | |||
if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; | |||
/* Followed by any remaining ones: | |||
*/ | |||
for (i = 6; i < nr; i++) { | |||
if (dot4(clip, plane[i]) < 0) | |||
mask |= (1<<i); | |||
} | |||
return mask; | |||
} | |||
/** | |||
* Transform vertices with the current vertex program/shader | |||
* Up to four vertices can be shaded at a time. | |||
* \param vbuffer the input vertex data | |||
* \param elts indexes of four input vertices | |||
* \param count number of vertices to shade [1..4] | |||
* \param vOut array of pointers to four output vertices | |||
*/ | |||
static void | |||
run_vertex_program(struct spu_vs_context *draw, | |||
unsigned elts[4], unsigned count, | |||
struct vertex_header *vOut[]) | |||
{ | |||
struct spu_exec_machine *machine = &draw->machine; | |||
unsigned int j; | |||
ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); | |||
ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); | |||
const float *scale = draw->viewport.scale; | |||
const float *trans = draw->viewport.translate; | |||
assert(count <= 4); | |||
/* Consts does not require 16 byte alignment. */ | |||
ASSERT_ALIGN16(draw->constants); | |||
machine->Consts = (float (*)[4]) draw->constants; | |||
machine->Inputs = ALIGN16_ASSIGN(inputs); | |||
machine->Outputs = ALIGN16_ASSIGN(outputs); | |||
spu_vertex_fetch( draw, machine, elts, count ); | |||
/* run shader */ | |||
spu_exec_machine_run( machine ); | |||
/* store machine results */ | |||
for (j = 0; j < count; j++) { | |||
unsigned slot; | |||
float x, y, z, w; | |||
/* Handle attr[0] (position) specially: | |||
* | |||
* XXX: Computing the clipmask should be done in the vertex | |||
* program as a set of DP4 instructions appended to the | |||
* user-provided code. | |||
*/ | |||
x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; | |||
y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; | |||
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; | |||
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; | |||
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, | |||
draw->nr_planes); | |||
vOut[j]->edgeflag = 1; | |||
/* divide by w */ | |||
w = 1.0f / w; | |||
x *= w; | |||
y *= w; | |||
z *= w; | |||
/* Viewport mapping */ | |||
vOut[j]->data[0][0] = x * scale[0] + trans[0]; | |||
vOut[j]->data[0][1] = y * scale[1] + trans[1]; | |||
vOut[j]->data[0][2] = z * scale[2] + trans[2]; | |||
vOut[j]->data[0][3] = w; | |||
#if DBG_VS | |||
printf("output[%d]win: %f %f %f %f\n", j, | |||
vOut[j]->data[0][0], | |||
vOut[j]->data[0][1], | |||
vOut[j]->data[0][2], | |||
vOut[j]->data[0][3]); | |||
#endif | |||
/* Remaining attributes are packed into sequential post-transform | |||
* vertex attrib slots. | |||
*/ | |||
for (slot = 1; slot < draw->num_vs_outputs; slot++) { | |||
vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; | |||
vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; | |||
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; | |||
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; | |||
#if DBG_VS | |||
printf("output[%d][%d]: %f %f %f %f\n", j, slot, | |||
vOut[j]->data[slot][0], | |||
vOut[j]->data[slot][1], | |||
vOut[j]->data[slot][2], | |||
vOut[j]->data[slot][3]); | |||
#endif | |||
} | |||
} /* loop over vertices */ | |||
} | |||
static void | |||
spu_bind_vertex_shader(struct spu_vs_context *draw, | |||
void *uniforms, | |||
void *planes, | |||
unsigned nr_planes, | |||
unsigned num_outputs | |||
) | |||
{ | |||
draw->constants = (float (*)[4]) uniforms; | |||
(void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); | |||
draw->nr_planes = nr_planes; | |||
draw->num_vs_outputs = num_outputs; | |||
/* specify the shader to interpret/execute */ | |||
spu_exec_machine_init(&draw->machine, | |||
PIPE_MAX_SAMPLERS, | |||
NULL /*samplers*/, | |||
PIPE_SHADER_VERTEX); | |||
} | |||
void | |||
spu_execute_vertex_shader(struct spu_vs_context *draw, | |||
const struct cell_command_vs *vs) | |||
{ | |||
unsigned i; | |||
unsigned j; | |||
draw->machine.Instructions = (struct tgsi_full_instruction *) | |||
vs->shader.instructions; | |||
draw->machine.NumInstructions = vs->shader.num_instructions; | |||
draw->machine.Declarations = (struct tgsi_full_declaration *) | |||
vs->shader.declarations; | |||
draw->machine.NumDeclarations = vs->shader.num_declarations; | |||
spu_bind_vertex_shader(draw, vs->shader.uniforms, | |||
NULL, -1, | |||
vs->shader.num_outputs); | |||
for (i = 0; i < vs->num_elts; i += 4) { | |||
const unsigned batch_size = MIN2(vs->num_elts - i, 4); | |||
unsigned elts[4]; | |||
for (j = 0; j < batch_size; j++) { | |||
switch (vs->bytes_per_elt) { | |||
case 1: elts[j] = ((unsigned char *) vs->elts)[i + j]; break; | |||
case 2: elts[j] = ((unsigned short *)vs->elts)[i + j]; break; | |||
case 4: elts[j] = ((unsigned int *) vs->elts)[i + j]; break; | |||
} | |||
} | |||
run_vertex_program(draw, elts, batch_size, | |||
(struct vertex_header (*)[]) vs->vOut); | |||
} | |||
} |
@@ -0,0 +1,61 @@ | |||
#ifndef SPU_VERTEX_SHADER_H | |||
#define SPU_VERTEX_SHADER_H | |||
#include "pipe/p_format.h" | |||
#include "spu_exec.h" | |||
struct spu_vs_context; | |||
typedef void (*spu_fetch_func)(const void *ptr, float *attrib); | |||
typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, | |||
struct spu_exec_machine *machine, | |||
const unsigned *elts, | |||
unsigned count ); | |||
struct spu_vs_context { | |||
struct pipe_viewport_state viewport; | |||
struct { | |||
const ubyte *src_ptr[PIPE_ATTRIB_MAX]; | |||
unsigned pitch[PIPE_ATTRIB_MAX]; | |||
enum pipe_format format[PIPE_ATTRIB_MAX]; | |||
unsigned nr_attrs; | |||
boolean dirty; | |||
spu_fetch_func fetch[PIPE_ATTRIB_MAX]; | |||
spu_full_fetch_func fetch_func; | |||
} vertex_fetch; | |||
/* Clip derived state: | |||
*/ | |||
float plane[12][4]; | |||
unsigned nr_planes; | |||
struct spu_exec_machine machine; | |||
const float (*constants)[4]; | |||
unsigned num_vs_outputs; | |||
}; | |||
extern void spu_update_vertex_fetch(struct spu_vs_context *draw); | |||
static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, | |||
struct spu_exec_machine *machine, | |||
const unsigned *elts, | |||
unsigned count) | |||
{ | |||
if (draw->vertex_fetch.dirty) { | |||
spu_update_vertex_fetch(draw); | |||
draw->vertex_fetch.dirty = 0; | |||
} | |||
(*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); | |||
} | |||
struct cell_command_vs; | |||
extern void | |||
spu_execute_vertex_shader(struct spu_vs_context *draw, | |||
const struct cell_command_vs *vs); | |||
#endif /* SPU_VERTEX_SHADER_H */ |