Interpolate FS attributes in the shader. Do not copy WPOS in FS.tags/mesa_20090313
| @@ -31,6 +31,7 @@ | |||
| #ifndef SP_HEADERS_H | |||
| #define SP_HEADERS_H | |||
| #include "../tgsi/core/tgsi_core.h" | |||
| #define PRIM_POINT 1 | |||
| #define PRIM_LINE 2 | |||
| @@ -44,7 +45,6 @@ | |||
| #define QUAD_BOTTOM_RIGHT 1 | |||
| #define QUAD_TOP_LEFT 2 | |||
| #define QUAD_TOP_RIGHT 3 | |||
| #define QUAD_SIZE (2*2) | |||
| #define MASK_BOTTOM_LEFT 0x1 | |||
| #define MASK_BOTTOM_RIGHT 0x2 | |||
| @@ -53,17 +53,6 @@ | |||
| #define MASK_ALL 0xf | |||
| #define NUM_CHANNELS 4 /* avoid confusion between 4 pixels and 4 channels */ | |||
| struct setup_coefficient { | |||
| float a0[NUM_CHANNELS]; /* in an xyzw layout */ | |||
| float dadx[NUM_CHANNELS]; | |||
| float dady[NUM_CHANNELS]; | |||
| }; | |||
| /** | |||
| * Encodes everything we need to know about a 2x2 pixel block. Uses | |||
| * "Channel-Serial" or "SoA" layout. | |||
| @@ -76,17 +65,13 @@ struct quad_header { | |||
| unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ | |||
| struct { | |||
| float color[4][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ | |||
| float color[NUM_CHANNELS][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ | |||
| float depth[QUAD_SIZE]; | |||
| } outputs; | |||
| float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ | |||
| const struct setup_coefficient *coef; | |||
| const enum interp_mode *interp; /* XXX: this information should be | |||
| * encoded in fragment program DECL | |||
| * statements. */ | |||
| const struct tgsi_interp_coef *coef; | |||
| unsigned nr_attrs; | |||
| }; | |||
| @@ -80,7 +80,7 @@ struct setup_stage { | |||
| float oneoverarea; | |||
| struct setup_coefficient coef[FRAG_ATTRIB_MAX]; | |||
| struct tgsi_interp_coef coef[FRAG_ATTRIB_MAX]; | |||
| struct quad_header quad; | |||
| struct { | |||
| @@ -33,15 +33,12 @@ | |||
| */ | |||
| #include "pipe/p_util.h" | |||
| #include "tgsi/core/tgsi_core.h" | |||
| #include "sp_context.h" | |||
| #include "sp_headers.h" | |||
| #include "sp_quad.h" | |||
| #include "sp_tex_sample.h" | |||
| #include "main/mtypes.h" | |||
| #if defined __GNUC__ | |||
| #define USE_ALIGNED_ATTRIBS 1 | |||
| @@ -66,157 +63,33 @@ quad_shade_stage(struct quad_stage *qs) | |||
| return (struct quad_shade_stage *) qs; | |||
| } | |||
| struct exec_machine { | |||
| const struct setup_coefficient *coef; /**< will point to quad->coef */ | |||
| float attr[PIPE_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE] ALIGN16_SUFFIX; | |||
| }; | |||
| /** | |||
| * Compute quad's attributes values, as constants (GL_FLAT shading). | |||
| */ | |||
| static INLINE void cinterp( struct exec_machine *exec, | |||
| unsigned attrib, | |||
| unsigned i ) | |||
| { | |||
| unsigned j; | |||
| for (j = 0; j < QUAD_SIZE; j++) { | |||
| exec->attr[attrib][i][j] = exec->coef[attrib].a0[i]; | |||
| } | |||
| } | |||
| /** | |||
| * Compute quad's attribute values by linear interpolation. | |||
| * | |||
| * Push into the fp: | |||
| * | |||
| * INPUT[attr] = MAD COEF_A0[attr], COEF_DADX[attr], INPUT_WPOS.xxxx | |||
| * INPUT[attr] = MAD INPUT[attr], COEF_DADY[attr], INPUT_WPOS.yyyy | |||
| */ | |||
| static INLINE void linterp( struct exec_machine *exec, | |||
| unsigned attrib, | |||
| unsigned i ) | |||
| { | |||
| unsigned j; | |||
| for (j = 0; j < QUAD_SIZE; j++) { | |||
| const float x = exec->attr[FRAG_ATTRIB_WPOS][0][j]; | |||
| const float y = exec->attr[FRAG_ATTRIB_WPOS][1][j]; | |||
| exec->attr[attrib][i][j] = (exec->coef[attrib].a0[i] + | |||
| exec->coef[attrib].dadx[i] * x + | |||
| exec->coef[attrib].dady[i] * y); | |||
| } | |||
| } | |||
| /** | |||
| * Compute quad's attribute values by linear interpolation with | |||
| * perspective correction. | |||
| * | |||
| * Push into the fp: | |||
| * | |||
| * INPUT[attr] = MAD COEF_DADX[attr], INPUT_WPOS.xxxx, COEF_A0[attr] | |||
| * INPUT[attr] = MAD COEF_DADY[attr], INPUT_WPOS.yyyy, INPUT[attr] | |||
| * TMP = RCP INPUT_WPOS.w | |||
| * INPUT[attr] = MUL INPUT[attr], TMP.xxxx | |||
| * | |||
| */ | |||
| static INLINE void pinterp( struct exec_machine *exec, | |||
| unsigned attrib, | |||
| unsigned i ) | |||
| { | |||
| unsigned j; | |||
| for (j = 0; j < QUAD_SIZE; j++) { | |||
| const float x = exec->attr[FRAG_ATTRIB_WPOS][0][j]; | |||
| const float y = exec->attr[FRAG_ATTRIB_WPOS][1][j]; | |||
| /* FRAG_ATTRIB_WPOS.w here is really 1/w */ | |||
| const float w = 1.0 / exec->attr[FRAG_ATTRIB_WPOS][3][j]; | |||
| exec->attr[attrib][i][j] = ((exec->coef[attrib].a0[i] + | |||
| exec->coef[attrib].dadx[i] * x + | |||
| exec->coef[attrib].dady[i] * y) * w); | |||
| } | |||
| } | |||
| /* This should be done by the fragment shader execution unit (code | |||
| * generated from the decl instructions). Do it here for now. | |||
| */ | |||
| static void | |||
| shade_quad( struct quad_stage *qs, struct quad_header *quad ) | |||
| shade_quad( | |||
| struct quad_stage *qs, | |||
| struct quad_header *quad ) | |||
| { | |||
| struct quad_shade_stage *qss = quad_shade_stage(qs); | |||
| struct quad_shade_stage *qss = quad_shade_stage( qs ); | |||
| struct softpipe_context *softpipe = qs->softpipe; | |||
| struct exec_machine exec; | |||
| const float fx = quad->x0; | |||
| const float fy = quad->y0; | |||
| unsigned attr, i; | |||
| struct tgsi_exec_machine machine; | |||
| #if USE_ALIGNED_ATTRIBS | |||
| struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX] ALIGN16_SUFFIX; | |||
| struct tgsi_exec_vector inputs[PIPE_ATTRIB_MAX] ALIGN16_SUFFIX; | |||
| struct tgsi_exec_vector outputs[PIPE_ATTRIB_MAX] ALIGN16_SUFFIX; | |||
| #else | |||
| struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1]; | |||
| struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX + 1]; | |||
| struct tgsi_exec_vector inputs[PIPE_ATTRIB_MAX + 1]; | |||
| struct tgsi_exec_vector outputs[PIPE_ATTRIB_MAX + 1]; | |||
| #endif | |||
| exec.coef = quad->coef; | |||
| /* Position: | |||
| */ | |||
| exec.attr[FRAG_ATTRIB_WPOS][0][0] = fx; | |||
| exec.attr[FRAG_ATTRIB_WPOS][0][1] = fx + 1.0; | |||
| exec.attr[FRAG_ATTRIB_WPOS][0][2] = fx; | |||
| exec.attr[FRAG_ATTRIB_WPOS][0][3] = fx + 1.0; | |||
| exec.attr[FRAG_ATTRIB_WPOS][1][0] = fy; | |||
| exec.attr[FRAG_ATTRIB_WPOS][1][1] = fy; | |||
| exec.attr[FRAG_ATTRIB_WPOS][1][2] = fy + 1.0; | |||
| exec.attr[FRAG_ATTRIB_WPOS][1][3] = fy + 1.0; | |||
| /* Z and W are done by linear interpolation */ | |||
| if (softpipe->need_z) { | |||
| linterp(&exec, 0, 2); /* attr[0].z */ | |||
| } | |||
| if (softpipe->need_w) { | |||
| linterp(&exec, 0, 3); /* attr[0].w */ | |||
| /*invert(&exec, 0, 3);*/ | |||
| } | |||
| /* Interpolate all the remaining attributes. This will get pushed | |||
| * into the fragment program's responsibilities at some point. | |||
| * Start at 1 to skip fragment position attribute (computed above). | |||
| */ | |||
| for (attr = 1; attr < quad->nr_attrs; attr++) { | |||
| switch (softpipe->interp[attr]) { | |||
| case INTERP_CONSTANT: | |||
| for (i = 0; i < NUM_CHANNELS; i++) | |||
| cinterp(&exec, attr, i); | |||
| break; | |||
| case INTERP_LINEAR: | |||
| for (i = 0; i < NUM_CHANNELS; i++) | |||
| linterp(&exec, attr, i); | |||
| break; | |||
| case INTERP_PERSPECTIVE: | |||
| for (i = 0; i < NUM_CHANNELS; i++) | |||
| pinterp(&exec, attr, i); | |||
| break; | |||
| } | |||
| } | |||
| #ifdef DEBUG | |||
| memset( &machine, 0, sizeof( machine ) ); | |||
| #endif | |||
| assert( sizeof( struct tgsi_exec_vector ) == sizeof( exec.attr[0] ) ); | |||
| /* init machine state */ | |||
| tgsi_exec_machine_init( | |||
| &machine, | |||
| @@ -228,33 +101,40 @@ shade_quad( struct quad_stage *qs, struct quad_header *quad ) | |||
| machine.Consts = softpipe->fs.constants->constant; | |||
| #if USE_ALIGNED_ATTRIBS | |||
| machine.Inputs = (struct tgsi_exec_vector *) exec.attr; | |||
| machine.Inputs = inputs; | |||
| machine.Outputs = outputs; | |||
| #else | |||
| machine.Inputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs ); | |||
| machine.Outputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs ); | |||
| memcpy( | |||
| machine.Inputs, | |||
| exec.attr, | |||
| softpipe->nr_attrs * sizeof( struct tgsi_exec_vector ) ); | |||
| #endif | |||
| machine.InterpCoefs = quad->coef; | |||
| machine.Inputs[0].xyzw[0].f[0] = fx; | |||
| machine.Inputs[0].xyzw[0].f[1] = fx + 1.0; | |||
| machine.Inputs[0].xyzw[0].f[2] = fx; | |||
| machine.Inputs[0].xyzw[0].f[3] = fx + 1.0; | |||
| machine.Inputs[0].xyzw[1].f[0] = fy; | |||
| machine.Inputs[0].xyzw[1].f[1] = fy; | |||
| machine.Inputs[0].xyzw[1].f[2] = fy + 1.0; | |||
| machine.Inputs[0].xyzw[1].f[3] = fy + 1.0; | |||
| /* run shader */ | |||
| tgsi_exec_machine_run( &machine ); | |||
| /* store result color */ | |||
| memcpy( | |||
| quad->outputs.color, | |||
| &machine.Outputs[FRAG_ATTRIB_COL0].xyzw[0].f[0], | |||
| &machine.Outputs[1].xyzw[0].f[0], | |||
| sizeof( quad->outputs.color ) ); | |||
| if( softpipe->need_z ) { | |||
| /* XXX temporary */ | |||
| quad->outputs.depth[0] = exec.attr[0][2][0]; | |||
| quad->outputs.depth[1] = exec.attr[0][2][1]; | |||
| quad->outputs.depth[2] = exec.attr[0][2][2]; | |||
| quad->outputs.depth[3] = exec.attr[0][2][3]; | |||
| memcpy( | |||
| quad->outputs.depth, | |||
| &machine.Outputs[0].xyzw[2], | |||
| sizeof( quad->outputs.depth ) ); | |||
| } | |||
| /* shader may cull fragments */ | |||
| @@ -87,7 +87,7 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) | |||
| * fragment position (XYZW). | |||
| */ | |||
| if (softpipe->depth_test.enabled || | |||
| (inputsRead & FRAG_ATTRIB_WPOS)) | |||
| (inputsRead & (1 << FRAG_ATTRIB_WPOS))) | |||
| softpipe->need_z = TRUE; | |||
| else | |||
| softpipe->need_z = FALSE; | |||
| @@ -95,7 +95,7 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) | |||
| /* Need W if we do any perspective-corrected interpolation or the | |||
| * fragment program uses the fragment position. | |||
| */ | |||
| if (inputsRead & FRAG_ATTRIB_WPOS) | |||
| if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) | |||
| softpipe->need_w = TRUE; | |||
| else | |||
| softpipe->need_w = FALSE; | |||
| @@ -88,6 +88,7 @@ tgsi_default_declaration( void ) | |||
| declaration.Size = 1; | |||
| declaration.File = TGSI_FILE_NULL; | |||
| declaration.Declare = TGSI_DECLARE_RANGE; | |||
| declaration.UsageMask = TGSI_WRITEMASK_XYZW; | |||
| declaration.Interpolate = 0; | |||
| declaration.Semantic = 0; | |||
| declaration.Padding = 0; | |||
| @@ -100,6 +101,7 @@ struct tgsi_declaration | |||
| tgsi_build_declaration( | |||
| unsigned file, | |||
| unsigned declare, | |||
| unsigned usage_mask, | |||
| unsigned interpolate, | |||
| unsigned semantic, | |||
| struct tgsi_header *header ) | |||
| @@ -112,6 +114,7 @@ tgsi_build_declaration( | |||
| declaration = tgsi_default_declaration(); | |||
| declaration.File = file; | |||
| declaration.Declare = declare; | |||
| declaration.UsageMask = usage_mask; | |||
| declaration.Interpolate = interpolate; | |||
| declaration.Semantic = semantic; | |||
| @@ -162,6 +165,7 @@ tgsi_build_full_declaration( | |||
| *declaration = tgsi_build_declaration( | |||
| full_decl->Declaration.File, | |||
| full_decl->Declaration.Declare, | |||
| full_decl->Declaration.UsageMask, | |||
| full_decl->Declaration.Interpolate, | |||
| full_decl->Declaration.Semantic, | |||
| header ); | |||
| @@ -38,6 +38,7 @@ struct tgsi_declaration | |||
| tgsi_build_declaration( | |||
| unsigned file, | |||
| unsigned declare, | |||
| unsigned usage_mask, | |||
| unsigned interpolate, | |||
| unsigned semantic, | |||
| struct tgsi_header *header ); | |||
| @@ -633,6 +633,22 @@ dump_declaration_short( | |||
| assert( 0 ); | |||
| } | |||
| if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) { | |||
| CHR( '.' ); | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { | |||
| CHR( 'x' ); | |||
| } | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { | |||
| CHR( 'y' ); | |||
| } | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { | |||
| CHR( 'z' ); | |||
| } | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { | |||
| CHR( 'w' ); | |||
| } | |||
| } | |||
| if( decl->Declaration.Interpolate ) { | |||
| TXT( ", " ); | |||
| ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); | |||
| @@ -659,6 +675,21 @@ dump_declaration_verbose( | |||
| ENM( decl->Declaration.File, TGSI_FILES ); | |||
| TXT( "\nDeclare : " ); | |||
| ENM( decl->Declaration.Declare, TGSI_DECLARES ); | |||
| if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { | |||
| TXT( "\nUsageMask : " ); | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { | |||
| CHR( 'X' ); | |||
| } | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { | |||
| CHR( 'Y' ); | |||
| } | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { | |||
| CHR( 'Z' ); | |||
| } | |||
| if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { | |||
| CHR( 'W' ); | |||
| } | |||
| } | |||
| if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { | |||
| TXT( "\nInterpolate: " ); | |||
| UID( decl->Declaration.Interpolate ); | |||
| @@ -62,7 +62,7 @@ | |||
| void | |||
| tgsi_exec_machine_init( | |||
| struct tgsi_exec_machine *mach, | |||
| struct tgsi_token *tokens, | |||
| const struct tgsi_token *tokens, | |||
| GLuint numSamplers, | |||
| struct tgsi_sampler *samplers) | |||
| { | |||
| @@ -1063,7 +1063,131 @@ fetch_texel( struct tgsi_sampler *sampler, | |||
| } | |||
| } | |||
| static void | |||
| constant_interpolation( | |||
| struct tgsi_exec_machine *mach, | |||
| unsigned attrib, | |||
| unsigned chan ) | |||
| { | |||
| unsigned i; | |||
| for( i = 0; i < QUAD_SIZE; i++ ) { | |||
| mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; | |||
| } | |||
| } | |||
| static void | |||
| linear_interpolation( | |||
| struct tgsi_exec_machine *mach, | |||
| unsigned attrib, | |||
| unsigned chan ) | |||
| { | |||
| unsigned i; | |||
| for( i = 0; i < QUAD_SIZE; i++ ) { | |||
| const float x = mach->Inputs[0].xyzw[0].f[i]; | |||
| const float y = mach->Inputs[0].xyzw[1].f[i]; | |||
| mach->Inputs[attrib].xyzw[chan].f[i] = | |||
| mach->InterpCoefs[attrib].a0[chan] + | |||
| mach->InterpCoefs[attrib].dadx[chan] * x + | |||
| mach->InterpCoefs[attrib].dady[chan] * y; | |||
| } | |||
| } | |||
| static void | |||
| perspective_interpolation( | |||
| struct tgsi_exec_machine *mach, | |||
| unsigned attrib, | |||
| unsigned chan ) | |||
| { | |||
| unsigned i; | |||
| for( i = 0; i < QUAD_SIZE; i++ ) { | |||
| const float x = mach->Inputs[0].xyzw[0].f[i]; | |||
| const float y = mach->Inputs[0].xyzw[1].f[i]; | |||
| // WPOS.w here is really 1/w | |||
| const float w = 1.0f / mach->Inputs[0].xyzw[3].f[i]; | |||
| mach->Inputs[attrib].xyzw[chan].f[i] = | |||
| (mach->InterpCoefs[attrib].a0[chan] + | |||
| mach->InterpCoefs[attrib].dadx[chan] * x + | |||
| mach->InterpCoefs[attrib].dady[chan] * y) * w; | |||
| } | |||
| } | |||
| typedef void (* interpolation_func)( | |||
| struct tgsi_exec_machine *mach, | |||
| unsigned attrib, | |||
| unsigned chan ); | |||
| static void | |||
| exec_declaration( | |||
| struct tgsi_exec_machine *mach, | |||
| const struct tgsi_full_declaration *decl ) | |||
| { | |||
| if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { | |||
| if( decl->Declaration.File == TGSI_FILE_INPUT ) { | |||
| unsigned first, last, mask, i, j; | |||
| interpolation_func interp; | |||
| assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); | |||
| first = decl->u.DeclarationRange.First; | |||
| last = decl->u.DeclarationRange.Last; | |||
| mask = decl->Declaration.UsageMask; | |||
| /* Do not touch WPOS.xy */ | |||
| if( first == 0 ) { | |||
| mask &= ~TGSI_WRITEMASK_XY; | |||
| if( mask == TGSI_WRITEMASK_NONE ) { | |||
| first++; | |||
| if( first > last ) { | |||
| return; | |||
| } | |||
| } | |||
| } | |||
| switch( decl->Interpolation.Interpolate ) { | |||
| case TGSI_INTERPOLATE_CONSTANT: | |||
| interp = constant_interpolation; | |||
| break; | |||
| case TGSI_INTERPOLATE_LINEAR: | |||
| interp = linear_interpolation; | |||
| break; | |||
| case TGSI_INTERPOLATE_PERSPECTIVE: | |||
| interp = perspective_interpolation; | |||
| break; | |||
| default: | |||
| assert( 0 ); | |||
| } | |||
| if( mask == TGSI_WRITEMASK_XYZW ) { | |||
| unsigned i, j; | |||
| for( i = first; i <= last; i++ ) { | |||
| for( j = 0; j < NUM_CHANNELS; j++ ) { | |||
| interp( mach, i, j ); | |||
| } | |||
| } | |||
| } | |||
| else { | |||
| unsigned i, j; | |||
| for( j = 0; j < NUM_CHANNELS; j++ ) { | |||
| if( mask & (1 << j) ) { | |||
| for( i = first; i <= last; i++ ) { | |||
| interp( mach, i, j ); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| static void | |||
| exec_instruction( | |||
| @@ -2161,6 +2285,7 @@ tgsi_exec_machine_run2( | |||
| tgsi_parse_token( &parse ); | |||
| switch( parse.FullToken.Token.Type ) { | |||
| case TGSI_TOKEN_TYPE_DECLARATION: | |||
| exec_declaration( mach, &parse.FullToken.FullDeclaration ); | |||
| break; | |||
| case TGSI_TOKEN_TYPE_IMMEDIATE: | |||
| break; | |||
| @@ -11,23 +11,27 @@ | |||
| extern "C" { | |||
| #endif // defined __cplusplus | |||
| #define NUM_CHANNELS 4 /* R,G,B,A */ | |||
| #define QUAD_SIZE 4 /* 4 pixel/quad */ | |||
| union tgsi_exec_channel | |||
| { | |||
| float f[4]; | |||
| int i[4]; | |||
| unsigned u[4]; | |||
| float f[QUAD_SIZE]; | |||
| int i[QUAD_SIZE]; | |||
| unsigned u[QUAD_SIZE]; | |||
| }; | |||
| struct tgsi_exec_vector | |||
| { | |||
| union tgsi_exec_channel xyzw[4]; | |||
| union tgsi_exec_channel xyzw[NUM_CHANNELS]; | |||
| }; | |||
| #define NUM_CHANNELS 4 /* R,G,B,A */ | |||
| #ifndef QUAD_SIZE | |||
| #define QUAD_SIZE 4 /* 4 pixel/quad */ | |||
| #endif | |||
| struct tgsi_interp_coef | |||
| { | |||
| float a0[NUM_CHANNELS]; /* in an xyzw layout */ | |||
| float dadx[NUM_CHANNELS]; | |||
| float dady[NUM_CHANNELS]; | |||
| }; | |||
| #define TEX_CACHE_TILE_SIZE 8 | |||
| #define TEX_CACHE_NUM_ENTRIES 8 | |||
| @@ -55,8 +59,8 @@ struct tgsi_sampler | |||
| struct tgsi_exec_labels | |||
| { | |||
| unsigned labels[128][2]; | |||
| unsigned count; | |||
| unsigned labels[128][2]; | |||
| unsigned count; | |||
| }; | |||
| #define TGSI_EXEC_TEMP_00000000_I 32 | |||
| @@ -109,15 +113,15 @@ struct tgsi_exec_cond_state | |||
| { | |||
| struct tgsi_exec_cond_regs IfPortion; | |||
| struct tgsi_exec_cond_regs ElsePortion; | |||
| unsigned Condition; | |||
| boolean WasElse; | |||
| unsigned Condition; | |||
| boolean WasElse; | |||
| }; | |||
| /* XXX: This is temporary */ | |||
| struct tgsi_exec_cond_stack | |||
| { | |||
| struct tgsi_exec_cond_state States[8]; | |||
| unsigned Index; /* into States[] */ | |||
| unsigned Index; /* into States[] */ | |||
| }; | |||
| struct tgsi_exec_machine | |||
| @@ -138,15 +142,19 @@ struct tgsi_exec_machine | |||
| struct tgsi_sampler *Samplers; | |||
| float Imms[256][4]; | |||
| unsigned ImmLimit; | |||
| float (*Consts)[4]; | |||
| const struct tgsi_exec_vector *Inputs; | |||
| float Imms[256][4]; | |||
| unsigned ImmLimit; | |||
| float (*Consts)[4]; | |||
| struct tgsi_exec_vector *Inputs; | |||
| struct tgsi_exec_vector *Outputs; | |||
| struct tgsi_token *Tokens; | |||
| unsigned Processor; | |||
| const struct tgsi_token *Tokens; | |||
| unsigned Processor; | |||
| /* GEOMETRY processor only. */ | |||
| unsigned *Primitives; | |||
| unsigned *Primitives; | |||
| /* FRAGMENT processor only. */ | |||
| const struct tgsi_interp_coef *InterpCoefs; | |||
| struct tgsi_exec_cond_stack CondStack; | |||
| #if XXX_SSE | |||
| @@ -157,7 +165,7 @@ struct tgsi_exec_machine | |||
| void | |||
| tgsi_exec_machine_init( | |||
| struct tgsi_exec_machine *mach, | |||
| struct tgsi_token *tokens, | |||
| const struct tgsi_token *tokens, | |||
| unsigned numSamplers, | |||
| struct tgsi_sampler *samplers); | |||
| @@ -52,15 +52,33 @@ struct tgsi_token | |||
| #define TGSI_DECLARE_RANGE 0 | |||
| #define TGSI_DECLARE_MASK 1 | |||
| #define TGSI_WRITEMASK_NONE 0x00 | |||
| #define TGSI_WRITEMASK_X 0x01 | |||
| #define TGSI_WRITEMASK_Y 0x02 | |||
| #define TGSI_WRITEMASK_XY 0x03 | |||
| #define TGSI_WRITEMASK_Z 0x04 | |||
| #define TGSI_WRITEMASK_XZ 0x05 | |||
| #define TGSI_WRITEMASK_YZ 0x06 | |||
| #define TGSI_WRITEMASK_XYZ 0x07 | |||
| #define TGSI_WRITEMASK_W 0x08 | |||
| #define TGSI_WRITEMASK_XW 0x09 | |||
| #define TGSI_WRITEMASK_YW 0x0A | |||
| #define TGSI_WRITEMASK_XYW 0x0B | |||
| #define TGSI_WRITEMASK_ZW 0x0C | |||
| #define TGSI_WRITEMASK_XZW 0x0D | |||
| #define TGSI_WRITEMASK_YZW 0x0E | |||
| #define TGSI_WRITEMASK_XYZW 0x0F | |||
| struct tgsi_declaration | |||
| { | |||
| unsigned Type : 4; /* TGSI_TOKEN_TYPE_DECLARATION */ | |||
| unsigned Size : 8; /* UINT */ | |||
| unsigned File : 4; /* TGSI_FILE_ */ | |||
| unsigned Declare : 4; /* TGSI_DECLARE_ */ | |||
| unsigned UsageMask : 4; /* TGSI_WRITEMASK_ */ | |||
| unsigned Interpolate : 1; /* BOOL */ | |||
| unsigned Semantic : 1; /* BOOL */ | |||
| unsigned Padding : 9; | |||
| unsigned Padding : 5; | |||
| unsigned Extended : 1; /* BOOL */ | |||
| }; | |||
| @@ -1226,23 +1244,6 @@ struct tgsi_instruction_ext_texture | |||
| unsigned Extended : 1; /* BOOL */ | |||
| }; | |||
| #define TGSI_WRITEMASK_NONE 0x00 | |||
| #define TGSI_WRITEMASK_X 0x01 | |||
| #define TGSI_WRITEMASK_Y 0x02 | |||
| #define TGSI_WRITEMASK_XY 0x03 | |||
| #define TGSI_WRITEMASK_Z 0x04 | |||
| #define TGSI_WRITEMASK_XZ 0x05 | |||
| #define TGSI_WRITEMASK_YZ 0x06 | |||
| #define TGSI_WRITEMASK_XYZ 0x07 | |||
| #define TGSI_WRITEMASK_W 0x08 | |||
| #define TGSI_WRITEMASK_XW 0x09 | |||
| #define TGSI_WRITEMASK_YW 0x0A | |||
| #define TGSI_WRITEMASK_XYW 0x0B | |||
| #define TGSI_WRITEMASK_ZW 0x0C | |||
| #define TGSI_WRITEMASK_XZW 0x0D | |||
| #define TGSI_WRITEMASK_YZW 0x0E | |||
| #define TGSI_WRITEMASK_XYZW 0x0F | |||
| struct tgsi_instruction_ext_predicate | |||
| { | |||
| unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_PREDICATE */ | |||
| @@ -467,13 +467,15 @@ static struct tgsi_full_declaration | |||
| make_frag_input_decl( | |||
| GLuint first, | |||
| GLuint last, | |||
| GLuint interpolate ) | |||
| GLuint interpolate, | |||
| GLuint usage_mask ) | |||
| { | |||
| struct tgsi_full_declaration decl; | |||
| decl = tgsi_default_full_declaration(); | |||
| decl.Declaration.File = TGSI_FILE_INPUT; | |||
| decl.Declaration.Declare = TGSI_DECLARE_RANGE; | |||
| decl.Declaration.UsageMask = usage_mask; | |||
| decl.Declaration.Interpolate = 1; | |||
| decl.u.DeclarationRange.First = first; | |||
| decl.u.DeclarationRange.Last = last; | |||
| @@ -485,13 +487,15 @@ make_frag_input_decl( | |||
| static struct tgsi_full_declaration | |||
| make_frag_output_decl( | |||
| GLuint index, | |||
| GLuint semantic_name ) | |||
| GLuint semantic_name, | |||
| GLuint usage_mask ) | |||
| { | |||
| struct tgsi_full_declaration decl; | |||
| decl = tgsi_default_full_declaration(); | |||
| decl.Declaration.File = TGSI_FILE_OUTPUT; | |||
| decl.Declaration.Declare = TGSI_DECLARE_RANGE; | |||
| decl.Declaration.UsageMask = usage_mask; | |||
| decl.Declaration.Semantic = 1; | |||
| decl.u.DeclarationRange.First = index; | |||
| decl.u.DeclarationRange.Last = index; | |||
| @@ -514,6 +518,7 @@ tgsi_mesa_compile_fp_program( | |||
| struct tgsi_full_dst_register *fulldst; | |||
| struct tgsi_full_src_register *fullsrc; | |||
| GLuint inputs_read; | |||
| GLboolean reads_wpos; | |||
| GLuint preamble_size = 0; | |||
| *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); | |||
| @@ -523,19 +528,33 @@ tgsi_mesa_compile_fp_program( | |||
| ti = 2; | |||
| /* | |||
| * Input 0 is always read, at least implicitly by the MOV instruction generated | |||
| * below, so mark it as used. | |||
| */ | |||
| inputs_read = program->Base.InputsRead | 1; | |||
| reads_wpos = program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS); | |||
| inputs_read = program->Base.InputsRead | (1 << FRAG_ATTRIB_WPOS); | |||
| /* | |||
| * Declare input attributes. Note that we do not interpolate fragment position. | |||
| */ | |||
| /* Fragment position. */ | |||
| if( reads_wpos ) { | |||
| fulldecl = make_frag_input_decl( | |||
| 0, | |||
| 0, | |||
| TGSI_INTERPOLATE_CONSTANT, | |||
| TGSI_WRITEMASK_XY ); | |||
| ti += tgsi_build_full_declaration( | |||
| &fulldecl, | |||
| &tokens[ti], | |||
| header, | |||
| maxTokens - ti ); | |||
| } | |||
| /* Fragment zw. */ | |||
| fulldecl = make_frag_input_decl( | |||
| 0, | |||
| 0, | |||
| TGSI_INTERPOLATE_CONSTANT ); | |||
| TGSI_INTERPOLATE_LINEAR, | |||
| reads_wpos ? TGSI_WRITEMASK_ZW : TGSI_WRITEMASK_Z ); | |||
| ti += tgsi_build_full_declaration( | |||
| &fulldecl, | |||
| &tokens[ti], | |||
| @@ -552,7 +571,8 @@ tgsi_mesa_compile_fp_program( | |||
| fulldecl = make_frag_input_decl( | |||
| 1, | |||
| 1 + count - 1, | |||
| TGSI_INTERPOLATE_LINEAR ); | |||
| TGSI_INTERPOLATE_LINEAR, | |||
| TGSI_WRITEMASK_XYZW ); | |||
| ti += tgsi_build_full_declaration( | |||
| &fulldecl, | |||
| &tokens[ti], | |||
| @@ -569,7 +589,8 @@ tgsi_mesa_compile_fp_program( | |||
| fulldecl = make_frag_output_decl( | |||
| 0, | |||
| TGSI_SEMANTIC_DEPTH ); | |||
| TGSI_SEMANTIC_DEPTH, | |||
| TGSI_WRITEMASK_Z ); | |||
| ti += tgsi_build_full_declaration( | |||
| &fulldecl, | |||
| &tokens[ti], | |||
| @@ -579,7 +600,8 @@ tgsi_mesa_compile_fp_program( | |||
| if( program->Base.OutputsWritten & (1 << FRAG_RESULT_COLR) ) { | |||
| fulldecl = make_frag_output_decl( | |||
| 1, | |||
| TGSI_SEMANTIC_COLOR ); | |||
| TGSI_SEMANTIC_COLOR, | |||
| TGSI_WRITEMASK_XYZW ); | |||
| ti += tgsi_build_full_declaration( | |||
| &fulldecl, | |||
| &tokens[ti], | |||
| @@ -587,38 +609,6 @@ tgsi_mesa_compile_fp_program( | |||
| maxTokens - ti ); | |||
| } | |||
| /* | |||
| * Copy input fragment xyz to output xyz. | |||
| * If the shader writes depth, do not copy the z component. | |||
| */ | |||
| fullinst = tgsi_default_full_instruction(); | |||
| fullinst.Instruction.Opcode = TGSI_OPCODE_MOV; | |||
| fullinst.Instruction.NumDstRegs = 1; | |||
| fullinst.Instruction.NumSrcRegs = 1; | |||
| fulldst = &fullinst.FullDstRegisters[0]; | |||
| fulldst->DstRegister.File = TGSI_FILE_OUTPUT; | |||
| fulldst->DstRegister.Index = 0; | |||
| if( program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR) ) { | |||
| fulldst->DstRegister.WriteMask = TGSI_WRITEMASK_XY; | |||
| } | |||
| else { | |||
| fulldst->DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; | |||
| } | |||
| fullsrc = &fullinst.FullSrcRegisters[0]; | |||
| fullsrc->SrcRegister.File = TGSI_FILE_INPUT; | |||
| fullsrc->SrcRegister.Index = 0; | |||
| ti += tgsi_build_full_instruction( | |||
| &fullinst, | |||
| &tokens[ti], | |||
| header, | |||
| maxTokens - ti ); | |||
| preamble_size++; | |||
| for( i = 0; i < program->Base.NumInstructions; i++ ) { | |||
| if( compile_instruction( | |||
| &program->Base.Instructions[i], | |||