Browse Source

draw: enable FSE by default

tags/mesa_20090313
Keith Whitwell 17 years ago
parent
commit
728d1f7f43

+ 4
- 7
src/gallium/auxiliary/draw/draw_pt.c View File

@@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw,

if (opt == 0)
middle = draw->pt.middle.fetch_emit;
else if (opt == PT_SHADE && draw->pt.test_fse)
else if (opt == PT_SHADE)
middle = draw->pt.middle.fetch_shade_emit;
else
middle = draw->pt.middle.general;
@@ -118,12 +118,9 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;

if (draw->pt.test_fse) {
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
}

draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;

draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)

+ 4
- 0
src/gallium/auxiliary/draw/draw_vs.h View File

@@ -123,6 +123,10 @@ struct draw_vertex_shader {

struct tgsi_shader_info info;

/* Extracted from shader:
*/
const float (*immediates)[4];

/*
*/
struct draw_vs_varient *varient[16];

+ 79
- 26
src/gallium/auxiliary/draw/draw_vs_aos.c View File

@@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a,
a.disp == b.disp);
}
struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned value )
{
if (cp->ebp != value) {
unsigned offset;

switch (value) {
case X86_IMMEDIATES:
offset = Offset(struct aos_machine, immediates);
break;
case X86_CONSTANTS:
offset = Offset(struct aos_machine, constants);
break;
case X86_ATTRIBS:
offset = Offset(struct aos_machine, attrib);
break;
default:
assert(0);
offset = 0;
}

x86_mov(cp->func, cp->temp_EBP,
x86_make_disp(cp->machine_EDX, offset));
/* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */

cp->ebp = value;
}

return cp->temp_EBP;
}


static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
unsigned file,
@@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
case TGSI_FILE_TEMPORARY:
return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));

case TGSI_FILE_IMMEDIATE:
return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));

case TGSI_FILE_CONSTANT:
return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));

case AOS_FILE_INTERNAL:
return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));

case TGSI_FILE_IMMEDIATE:
return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float));

case TGSI_FILE_CONSTANT:
return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float));

default:
ERROR(cp, "unknown reg file");
return x86_make_reg(0,0);
@@ -1865,6 +1896,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp )
}


#if 0
static boolean note_immediate( struct aos_compilation *cp,
struct tgsi_full_immediate *imm )
{
@@ -1877,6 +1909,7 @@ static boolean note_immediate( struct aos_compilation *cp,

return TRUE;
}
#endif



@@ -1939,6 +1972,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);

x86_init_func(cp.func);

@@ -1946,6 +1980,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,

x86_push(cp.func, cp.idx_EBX);
x86_push(cp.func, cp.count_ESI);
x86_push(cp.func, cp.temp_EBP);


/* Load arguments into regs:
@@ -1988,8 +2023,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,

switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
#if 0
if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
goto fail;
#endif
break;

case TGSI_TOKEN_TYPE_INSTRUCTION:
@@ -2072,6 +2109,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
if (cp.func->need_emms)
mmx_emms(cp.func);

x86_pop(cp.func, cp.temp_EBP);
x86_pop(cp.func, cp.count_ESI);
x86_pop(cp.func, cp.idx_EBX);

@@ -2098,26 +2136,14 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,

for (i = 0; i < vaos->base.key.nr_inputs; i++) {
if (vaos->base.key.element[i].in.buffer == buf) {
vaos->machine->attrib[i].input_ptr = ((char *)ptr +
vaos->base.key.element[i].in.offset);
vaos->machine->attrib[i].input_stride = stride;
vaos->attrib[i].input_ptr = ((char *)ptr +
vaos->base.key.element[i].in.offset);
vaos->attrib[i].input_stride = stride;
}
}
}


static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;

if (vaos->machine)
align_free( vaos->machine );

x86_release_func( &vaos->func[0] );
x86_release_func( &vaos->func[1] );

FREE(vaos);
}

static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
@@ -2127,6 +2153,10 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;

vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
vaos->machine->constants = vaos->draw->pt.user.constants;
vaos->machine->immediates = vaos->base.vs->immediates;
vaos->machine->attrib = vaos->attrib;

vaos->gen_run_elts( varient,
elts,
count,
@@ -2141,6 +2171,10 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;

vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
vaos->machine->constants = vaos->draw->pt.user.constants;
vaos->machine->immediates = vaos->base.vs->immediates;
vaos->machine->attrib = vaos->attrib;

vaos->gen_run_linear( varient,
start,
count,
@@ -2153,10 +2187,6 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;

memcpy(vaos->machine->constant,
constants,
(vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));

#if 0
unsigned i;
for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
@@ -2187,6 +2217,21 @@ static void vaos_set_viewport( struct draw_vs_varient *varient,
memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
}

static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;

if (vaos->machine)
align_free( vaos->machine );

FREE( vaos->attrib );

x86_release_func( &vaos->func[0] );
x86_release_func( &vaos->func[1] );

FREE(vaos);
}



static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
@@ -2207,6 +2252,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.run_elts = vaos_run_elts;

vaos->draw = vs->draw;

vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
if (!vaos->attrib)
goto fail;

vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
if (!vaos->machine)
goto fail;
@@ -2233,7 +2283,10 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
return &vaos->base;

fail:
if (vaos->machine)
if (vaos && vaos->attrib)
FREE(vaos->attrib);

if (vaos && vaos->machine)
align_free( vaos->machine );

if (vaos)

+ 20
- 14
src/gallium/auxiliary/draw/draw_vs_aos.h View File

@@ -78,6 +78,14 @@ struct lit_info {
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16

struct aos_attrib {
const void *input_ptr;
unsigned input_stride;
};




/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
@@ -86,8 +94,6 @@ struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];

float scale[4]; /* viewport */
@@ -105,12 +111,10 @@ struct aos_machine {
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */

struct {
const void *input_ptr;
unsigned input_stride;
const float (*immediates)[4]; /* points to shader data */
const float (*constants)[4]; /* points to draw data */

unsigned output_offset;
} attrib[PIPE_MAX_ATTRIBS];
const struct aos_attrib *attrib; /* points to ? */
};


@@ -132,6 +136,7 @@ struct aos_compilation {
unsigned last_used;
} xmm[8];

unsigned ebp; /* one of X86_* */

boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
@@ -148,6 +153,7 @@ struct aos_compilation {
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
struct x86_reg temp_EBP;
};

struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
@@ -192,20 +198,20 @@ do { \
} while (0)


#define X86_NULL 0
#define X86_IMMEDIATES 1
#define X86_CONSTANTS 2
#define X86_ATTRIBS 3


struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned value );


struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;

#if 0
struct {
const void *ptr;
unsigned stride;
} attrib[PIPE_MAX_ATTRIBS];
#endif
struct aos_attrib *attrib;

struct aos_machine *machine; /* XXX: temporarily unshared */


+ 12
- 13
src/gallium/auxiliary/draw/draw_vs_aos_io.c View File

@@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,



static void get_src_ptr( struct x86_function *func,
static void get_src_ptr( struct aos_compilation *cp,
struct x86_reg src,
struct x86_reg machine,
struct x86_reg elt,
unsigned a )
{
struct x86_reg input_ptr =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_ptr));
struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ),
a * sizeof(struct aos_attrib));

struct x86_reg input_stride =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_stride));
struct x86_reg input_ptr = x86_make_disp(attrib,
Offset(struct aos_attrib, input_ptr));

struct x86_reg input_stride = x86_make_disp(attrib,
Offset(struct aos_attrib, input_stride));

/* Calculate pointer to current attrib:
*/
x86_mov(func, src, input_stride);
x86_imul(func, src, elt);
x86_add(func, src, input_ptr);
x86_mov(cp->func, src, input_stride);
x86_imul(cp->func, src, elt);
x86_add(cp->func, src, input_ptr);
}


@@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp,

/* Figure out source pointer address:
*/
get_src_ptr(cp->func,
get_src_ptr(cp,
src,
cp->machine_EDX,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);


+ 10
- 4
src/gallium/auxiliary/draw/draw_vs_sse.c View File

@@ -68,8 +68,6 @@ struct draw_sse_vertex_shader {
codegen_function func;
struct tgsi_exec_machine *machine;

float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};


@@ -107,7 +105,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Outputs,
(float (*)[4])constants,
machine->Temps,
shader->immediates,
(float (*)[4])shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@@ -130,6 +128,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
x86_release_func( &shader->sse2_program );

align_free(shader->base.immediates);

FREE( (void*) shader->base.state.tokens );
FREE( shader );
}
@@ -161,12 +161,18 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);

vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );

if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
&vs->sse2_program, vs->immediates, TRUE ))
&vs->sse2_program,
(float (*)[4])vs->base.immediates,
TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );

Loading…
Cancel
Save