Browse Source

draw: add streamlined paths for fetching linear verts

tags/mesa_20090313
Keith Whitwell 17 years ago
parent
commit
dd7e5a4980

+ 21
- 23
src/gallium/auxiliary/draw/draw_vs_aos.c View File

@@ -92,9 +92,9 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp,
assert(which_reg == 1);
offset = Offset(struct aos_machine, constants);
break;
case X86_ATTRIBS:
case X86_BUFFERS:
assert(which_reg == 0);
offset = Offset(struct aos_machine, attrib);
offset = Offset(struct aos_machine, buffer);
break;
default:
assert(0);
@@ -1939,6 +1939,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
save_fpu_state( &cp );
set_fpu_round_nearest( &cp );

aos_init_inputs( &cp, linear );

/* Note address for loop jump
*/
label = x86_get_label(cp.func);
@@ -2018,13 +2020,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,

/* Incr index
*/
if (linear) {
x86_inc(cp.func, cp.idx_EBX);
}
else {
x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4));
}

aos_incr_inputs( &cp, linear );
}
/* decr count, loop if not zero
*/
@@ -2065,14 +2061,10 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
unsigned stride )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
unsigned i;

for (i = 0; i < vaos->base.key.nr_inputs; i++) {
if (vaos->base.key.element[i].in.buffer == buf) {
vaos->attrib[i].input_ptr = ((char *)ptr +
vaos->base.key.element[i].in.offset);
vaos->attrib[i].input_stride = stride;
}
if (buf < vaos->nr_vb) {
vaos->buffer[buf].base_ptr = (char *)ptr;
vaos->buffer[buf].stride = stride;
}
}

@@ -2089,7 +2081,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
machine->attrib = vaos->attrib;
machine->buffer = vaos->buffer;

vaos->gen_run_elts( machine,
elts,
@@ -2108,7 +2100,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
machine->attrib = vaos->attrib;
machine->buffer = vaos->buffer;

vaos->gen_run_linear( machine,
start,
@@ -2127,7 +2119,7 @@ static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;

FREE( vaos->attrib );
FREE( vaos->buffer );

x86_release_func( &vaos->func[0] );
x86_release_func( &vaos->func[1] );
@@ -2140,6 +2132,7 @@ static void vaos_destroy( struct draw_vs_varient *varient )
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key )
{
unsigned i;
struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);

if (!vaos)
@@ -2154,10 +2147,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,

vaos->draw = vs->draw;

vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
if (!vaos->attrib)
for (i = 0; i < key->nr_inputs; i++)
vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 );

vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) );
if (!vaos->buffer)
goto fail;

debug_printf("nr_vb: %d\n", vaos->nr_vb);

#if 0
tgsi_dump(vs->state.tokens, 0);
#endif
@@ -2179,8 +2177,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
return &vaos->base;

fail:
if (vaos && vaos->attrib)
FREE(vaos->attrib);
if (vaos && vaos->buffer)
FREE(vaos->buffer);

if (vaos)
x86_release_func( &vaos->func[0] );

+ 11
- 8
src/gallium/auxiliary/draw/draw_vs_aos.h View File

@@ -87,9 +87,10 @@ struct lit_info {
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16

struct aos_attrib {
const void *input_ptr;
unsigned input_stride;
struct aos_buffer {
const void *base_ptr;
unsigned stride;
void *ptr; /* updated per vertex */
};


@@ -123,7 +124,7 @@ struct aos_machine {
const float (*immediates)[4]; /* points to shader data */
const float (*constants)[4]; /* points to draw data */

const struct aos_attrib *attrib; /* points to ? */
const struct aos_buffer *buffer; /* points to ? */
};


@@ -179,8 +180,9 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
unsigned file,
unsigned idx );

boolean aos_fetch_inputs( struct aos_compilation *cp,
boolean linear );
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear );
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear );
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear );

boolean aos_emit_outputs( struct aos_compilation *cp );

@@ -210,7 +212,7 @@ do { \
#define X86_NULL 0
#define X86_IMMEDIATES 1
#define X86_CONSTANTS 2
#define X86_ATTRIBS 3
#define X86_BUFFERS 3

struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned which_reg,
@@ -232,7 +234,8 @@ struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;

struct aos_attrib *attrib;
struct aos_buffer *buffer;
unsigned nr_vb;

vaos_run_linear_func gen_run_linear;
vaos_run_elts_func gen_run_elts;

+ 102
- 35
src/gallium/auxiliary/draw/draw_vs_aos_io.c View File

@@ -95,28 +95,6 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,



static void get_src_ptr( struct aos_compilation *cp,
struct x86_reg src,
struct x86_reg elt,
unsigned a )
{
struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ),
a * sizeof(struct aos_attrib));

struct x86_reg input_ptr = x86_make_disp(attrib,
Offset(struct aos_attrib, input_ptr));

struct x86_reg input_stride = x86_make_disp(attrib,
Offset(struct aos_attrib, input_stride));

/* Calculate pointer to current attrib:
*/
x86_mov(cp->func, src, input_stride);
x86_imul(cp->func, src, elt);
x86_add(cp->func, src, input_ptr);
}


/* Extended swizzles? Maybe later.
*/
static void emit_swizzle( struct aos_compilation *cp,
@@ -128,22 +106,44 @@ static void emit_swizzle( struct aos_compilation *cp,
}



static boolean get_buffer_ptr( struct aos_compilation *cp,
unsigned buf_idx,
struct x86_reg elt,
struct x86_reg ptr)
{
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
buf_idx * sizeof(struct aos_buffer));

struct x86_reg buf_base_ptr = x86_make_disp(buf,
Offset(struct aos_buffer, base_ptr));

struct x86_reg buf_stride = x86_make_disp(buf,
Offset(struct aos_buffer, stride));

/* Calculate pointer to current attrib:
*/
x86_mov(cp->func, ptr, buf_stride);
x86_imul(cp->func, ptr, elt);
x86_add(cp->func, ptr, buf_base_ptr);

return TRUE;
}




static boolean load_input( struct aos_compilation *cp,
unsigned idx,
boolean linear )
struct x86_reg bufptr )
{
unsigned format = cp->vaos->base.key.element[idx].in.format;
struct x86_reg src = cp->tmp_EAX;
unsigned offset = cp->vaos->base.key.element[idx].in.offset;
struct x86_reg dataXMM = aos_get_xmm_reg(cp);

/* Figure out source pointer address:
*/
get_src_ptr(cp,
src,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);

src = x86_deref(src);
struct x86_reg src = x86_make_disp(bufptr, offset);

aos_adopt_xmm_reg( cp,
dataXMM,
@@ -179,20 +179,87 @@ static boolean load_input( struct aos_compilation *cp,
return TRUE;
}


boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
static boolean load_inputs( struct aos_compilation *cp,
unsigned buffer,
struct x86_reg ptr )
{
unsigned i;
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
if (!load_input( cp, i, linear ))
if (cp->vaos->base.key.element[i].in.buffer == buffer) {

if (!load_input( cp, i, ptr ))
return FALSE;

cp->insn_counter++;
}
}
return TRUE;
}

boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
{
if (linear && cp->vaos->nr_vb == 1) {

struct x86_reg elt = cp->idx_EBX;
struct x86_reg ptr = cp->tmp_EAX;

if (!get_buffer_ptr( cp, 0, elt, ptr ))
return FALSE;
cp->insn_counter++;

/* In the linear, single buffer case, keep the buffer pointer
* instead of the index number.
*/
x86_mov( cp->func, elt, ptr );
}

return TRUE;
}

boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
{
if (linear && cp->vaos->nr_vb == 1) {
load_inputs( cp, 0, cp->idx_EBX );

}
else {
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
unsigned j;
for (j = 0; j < cp->vaos->nr_vb; j++) {
struct x86_reg ptr = cp->tmp_EAX;

if (!get_buffer_ptr( cp, j, elt, ptr ))
return FALSE;

cp->insn_counter++;

if (!load_inputs( cp, j, ptr ))
return FALSE;
}
}

return TRUE;
}

boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
{
if (linear && cp->vaos->nr_vb == 1) {
struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
(0 * sizeof(struct aos_buffer) +
Offset(struct aos_buffer, stride)));

x86_add(cp->func, cp->idx_EBX, stride);
}
else if (linear) {
x86_inc(cp->func, cp->idx_EBX);
}
else {
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
}
}




Loading…
Cancel
Save