ソースを参照

gallium: call tgsi_set_exec_mask() and use exec mask in SSE ARL code

This prevents vertex shaders from referencing invalid memory locations when
the shader is operating on less than four vertices or fragments.
tags/mesa_20090313
Brian Paul 17年前
コミット
f0debbb0bb

+ 6
- 0
src/gallium/auxiliary/draw/draw_vs_exec.c ファイルの表示

@@ -120,6 +120,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}

tgsi_set_exec_mask(machine,
1,
max_vertices > 1,
max_vertices > 2,
max_vertices > 3);

/* run interpreter */
tgsi_exec_machine_run( machine );


+ 14
- 0
src/gallium/auxiliary/draw/draw_vs_sse.c ファイルの表示

@@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
struct tgsi_exec_machine *machine = shader->machine;
unsigned int i;

/* By default, execute all channels. XXX move this inside the loop
* below when we support shader conditionals/loops.
*/
tgsi_set_exec_mask(machine, 1, 1, 1, 1);

for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);

if (max_vertices < 4) {
/* disable the unused execution channels */
tgsi_set_exec_mask(machine,
1,
max_vertices > 1,
max_vertices > 2,
0);
}

/* run compiled shader
*/
shader->func(machine->Inputs,

+ 32
- 3
src/gallium/auxiliary/tgsi/tgsi_sse2.c ファイルの表示

@@ -69,6 +69,9 @@

#define TEMP_R0 TGSI_EXEC_TEMP_R0
#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I
#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C


/**
* X86 utility functions.
@@ -230,6 +233,9 @@ emit_const(
int indirectIndex )
{
if (indirect) {
/* 'vec' is the offset from the address register's value.
* We're loading CONST[ADDR+vec] into an xmm register.
*/
struct x86_reg r0 = get_input_base();
struct x86_reg r1 = get_output_base();
uint i;
@@ -240,18 +246,40 @@ emit_const(
x86_push( func, r0 );
x86_push( func, r1 );

/*
* Loop over the four pixels or vertices in the quad.
* Get the value of the address (offset) register for pixel/vertex[i],
* add it to the src offset and index into the constant buffer.
* Note that we're working on SOA data.
* If any of the pixel/vertex execution channels are unused their
* values will be garbage. It's very important that we don't use
* those garbage values as indexes into the constant buffer since
* that'll cause segfaults.
* The solution is to bitwise-AND the offset with the execution mask
* register whose values are either 0 or ~0.
* The caller must setup the execution mask register to indicate
* which channels are valid/alive before running the shader.
* The execution mask will also figure into loops and conditionals
* someday.
*/
for (i = 0; i < QUAD_SIZE; i++) {
x86_lea( func, r0, get_const( vec, chan ) );
/* r1 = address register[i] */
x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
/* r0 = execution mask[i] */
x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) );
/* r1 = r1 & r0 */
x86_and( func, r1, r0 );
/* r0 = 'vec', the offset */
x86_lea( func, r0, get_const( vec, chan ) );

/* Quick hack to multiply by 16 -- need to add SHL to rtasm.
/* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm.
*/
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );

x86_add( func, r0, r1 );
x86_add( func, r0, r1 ); /* r0 = r0 + r1 */
x86_mov( func, r1, x86_deref( r0 ) );
x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
}
@@ -265,6 +293,7 @@ emit_const(
get_temp( TEMP_R0, CHAN_X ) );
}
else {
/* 'vec' is the index into the src register file, such as TEMP[vec] */
assert( vec >= 0 );

sse_movss(

+ 2
- 1
src/gallium/drivers/softpipe/sp_fs_sse.c ファイルの表示

@@ -92,7 +92,8 @@ fs_sse_run( const struct sp_fragment_shader *base,
machine->Temps);

/* init kill mask */
machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0;
tgsi_set_kill_mask(machine, 0x0);
tgsi_set_exec_mask(machine, 1, 1, 1, 1);

shader->func( machine->Inputs,
machine->Outputs,

読み込み中…
キャンセル
保存