@@ -92,9 +92,9 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, | |||
assert(which_reg == 1); | |||
offset = Offset(struct aos_machine, constants); | |||
break; | |||
case X86_ATTRIBS: | |||
case X86_BUFFERS: | |||
assert(which_reg == 0); | |||
offset = Offset(struct aos_machine, attrib); | |||
offset = Offset(struct aos_machine, buffer); | |||
break; | |||
default: | |||
assert(0); | |||
@@ -1939,6 +1939,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, | |||
save_fpu_state( &cp ); | |||
set_fpu_round_nearest( &cp ); | |||
aos_init_inputs( &cp, linear ); | |||
/* Note address for loop jump | |||
*/ | |||
label = x86_get_label(cp.func); | |||
@@ -2018,13 +2020,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, | |||
/* Incr index | |||
*/ | |||
if (linear) { | |||
x86_inc(cp.func, cp.idx_EBX); | |||
} | |||
else { | |||
x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4)); | |||
} | |||
aos_incr_inputs( &cp, linear ); | |||
} | |||
/* decr count, loop if not zero | |||
*/ | |||
@@ -2065,14 +2061,10 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, | |||
unsigned stride ) | |||
{ | |||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; | |||
unsigned i; | |||
for (i = 0; i < vaos->base.key.nr_inputs; i++) { | |||
if (vaos->base.key.element[i].in.buffer == buf) { | |||
vaos->attrib[i].input_ptr = ((char *)ptr + | |||
vaos->base.key.element[i].in.offset); | |||
vaos->attrib[i].input_stride = stride; | |||
} | |||
if (buf < vaos->nr_vb) { | |||
vaos->buffer[buf].base_ptr = (char *)ptr; | |||
vaos->buffer[buf].stride = stride; | |||
} | |||
} | |||
@@ -2089,7 +2081,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, | |||
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; | |||
machine->constants = vaos->draw->vs.aligned_constants; | |||
machine->immediates = vaos->base.vs->immediates; | |||
machine->attrib = vaos->attrib; | |||
machine->buffer = vaos->buffer; | |||
vaos->gen_run_elts( machine, | |||
elts, | |||
@@ -2108,7 +2100,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, | |||
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; | |||
machine->constants = vaos->draw->vs.aligned_constants; | |||
machine->immediates = vaos->base.vs->immediates; | |||
machine->attrib = vaos->attrib; | |||
machine->buffer = vaos->buffer; | |||
vaos->gen_run_linear( machine, | |||
start, | |||
@@ -2127,7 +2119,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) | |||
{ | |||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; | |||
FREE( vaos->attrib ); | |||
FREE( vaos->buffer ); | |||
x86_release_func( &vaos->func[0] ); | |||
x86_release_func( &vaos->func[1] ); | |||
@@ -2140,6 +2132,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) | |||
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, | |||
const struct draw_vs_varient_key *key ) | |||
{ | |||
unsigned i; | |||
struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); | |||
if (!vaos) | |||
@@ -2154,10 +2147,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, | |||
vaos->draw = vs->draw; | |||
vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); | |||
if (!vaos->attrib) | |||
for (i = 0; i < key->nr_inputs; i++) | |||
vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 ); | |||
vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) ); | |||
if (!vaos->buffer) | |||
goto fail; | |||
debug_printf("nr_vb: %d\n", vaos->nr_vb); | |||
#if 0 | |||
tgsi_dump(vs->state.tokens, 0); | |||
#endif | |||
@@ -2179,8 +2177,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, | |||
return &vaos->base; | |||
fail: | |||
if (vaos && vaos->attrib) | |||
FREE(vaos->attrib); | |||
if (vaos && vaos->buffer) | |||
FREE(vaos->buffer); | |||
if (vaos) | |||
x86_release_func( &vaos->func[0] ); |
@@ -87,9 +87,10 @@ struct lit_info { | |||
#define MAX_SHINE_TAB 4 | |||
#define MAX_LIT_INFO 16 | |||
struct aos_attrib { | |||
const void *input_ptr; | |||
unsigned input_stride; | |||
struct aos_buffer { | |||
const void *base_ptr; | |||
unsigned stride; | |||
void *ptr; /* updated per vertex */ | |||
}; | |||
@@ -123,7 +124,7 @@ struct aos_machine { | |||
const float (*immediates)[4]; /* points to shader data */ | |||
const float (*constants)[4]; /* points to draw data */ | |||
const struct aos_attrib *attrib; /* points to ? */ | |||
const struct aos_buffer *buffer; /* points to ? */ | |||
}; | |||
@@ -179,8 +180,9 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, | |||
unsigned file, | |||
unsigned idx ); | |||
boolean aos_fetch_inputs( struct aos_compilation *cp, | |||
boolean linear ); | |||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ); | |||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ); | |||
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ); | |||
boolean aos_emit_outputs( struct aos_compilation *cp ); | |||
@@ -210,7 +212,7 @@ do { \ | |||
#define X86_NULL 0 | |||
#define X86_IMMEDIATES 1 | |||
#define X86_CONSTANTS 2 | |||
#define X86_ATTRIBS 3 | |||
#define X86_BUFFERS 3 | |||
struct x86_reg aos_get_x86( struct aos_compilation *cp, | |||
unsigned which_reg, | |||
@@ -232,7 +234,8 @@ struct draw_vs_varient_aos_sse { | |||
struct draw_vs_varient base; | |||
struct draw_context *draw; | |||
struct aos_attrib *attrib; | |||
struct aos_buffer *buffer; | |||
unsigned nr_vb; | |||
vaos_run_linear_func gen_run_linear; | |||
vaos_run_elts_func gen_run_elts; |
@@ -95,28 +95,6 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, | |||
static void get_src_ptr( struct aos_compilation *cp, | |||
struct x86_reg src, | |||
struct x86_reg elt, | |||
unsigned a ) | |||
{ | |||
struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), | |||
a * sizeof(struct aos_attrib)); | |||
struct x86_reg input_ptr = x86_make_disp(attrib, | |||
Offset(struct aos_attrib, input_ptr)); | |||
struct x86_reg input_stride = x86_make_disp(attrib, | |||
Offset(struct aos_attrib, input_stride)); | |||
/* Calculate pointer to current attrib: | |||
*/ | |||
x86_mov(cp->func, src, input_stride); | |||
x86_imul(cp->func, src, elt); | |||
x86_add(cp->func, src, input_ptr); | |||
} | |||
/* Extended swizzles? Maybe later. | |||
*/ | |||
static void emit_swizzle( struct aos_compilation *cp, | |||
@@ -128,22 +106,44 @@ static void emit_swizzle( struct aos_compilation *cp, | |||
} | |||
static boolean get_buffer_ptr( struct aos_compilation *cp, | |||
unsigned buf_idx, | |||
struct x86_reg elt, | |||
struct x86_reg ptr) | |||
{ | |||
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), | |||
buf_idx * sizeof(struct aos_buffer)); | |||
struct x86_reg buf_base_ptr = x86_make_disp(buf, | |||
Offset(struct aos_buffer, base_ptr)); | |||
struct x86_reg buf_stride = x86_make_disp(buf, | |||
Offset(struct aos_buffer, stride)); | |||
/* Calculate pointer to current attrib: | |||
*/ | |||
x86_mov(cp->func, ptr, buf_stride); | |||
x86_imul(cp->func, ptr, elt); | |||
x86_add(cp->func, ptr, buf_base_ptr); | |||
return TRUE; | |||
} | |||
static boolean load_input( struct aos_compilation *cp, | |||
unsigned idx, | |||
boolean linear ) | |||
struct x86_reg bufptr ) | |||
{ | |||
unsigned format = cp->vaos->base.key.element[idx].in.format; | |||
struct x86_reg src = cp->tmp_EAX; | |||
unsigned offset = cp->vaos->base.key.element[idx].in.offset; | |||
struct x86_reg dataXMM = aos_get_xmm_reg(cp); | |||
/* Figure out source pointer address: | |||
*/ | |||
get_src_ptr(cp, | |||
src, | |||
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), | |||
idx); | |||
src = x86_deref(src); | |||
struct x86_reg src = x86_make_disp(bufptr, offset); | |||
aos_adopt_xmm_reg( cp, | |||
dataXMM, | |||
@@ -179,20 +179,87 @@ static boolean load_input( struct aos_compilation *cp, | |||
return TRUE; | |||
} | |||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) | |||
static boolean load_inputs( struct aos_compilation *cp, | |||
unsigned buffer, | |||
struct x86_reg ptr ) | |||
{ | |||
unsigned i; | |||
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { | |||
if (!load_input( cp, i, linear )) | |||
if (cp->vaos->base.key.element[i].in.buffer == buffer) { | |||
if (!load_input( cp, i, ptr )) | |||
return FALSE; | |||
cp->insn_counter++; | |||
} | |||
} | |||
return TRUE; | |||
} | |||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) | |||
{ | |||
if (linear && cp->vaos->nr_vb == 1) { | |||
struct x86_reg elt = cp->idx_EBX; | |||
struct x86_reg ptr = cp->tmp_EAX; | |||
if (!get_buffer_ptr( cp, 0, elt, ptr )) | |||
return FALSE; | |||
cp->insn_counter++; | |||
/* In the linear, single buffer case, keep the buffer pointer | |||
* instead of the index number. | |||
*/ | |||
x86_mov( cp->func, elt, ptr ); | |||
} | |||
return TRUE; | |||
} | |||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) | |||
{ | |||
if (linear && cp->vaos->nr_vb == 1) { | |||
load_inputs( cp, 0, cp->idx_EBX ); | |||
} | |||
else { | |||
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); | |||
unsigned j; | |||
for (j = 0; j < cp->vaos->nr_vb; j++) { | |||
struct x86_reg ptr = cp->tmp_EAX; | |||
if (!get_buffer_ptr( cp, j, elt, ptr )) | |||
return FALSE; | |||
cp->insn_counter++; | |||
if (!load_inputs( cp, j, ptr )) | |||
return FALSE; | |||
} | |||
} | |||
return TRUE; | |||
} | |||
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) | |||
{ | |||
if (linear && cp->vaos->nr_vb == 1) { | |||
struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), | |||
(0 * sizeof(struct aos_buffer) + | |||
Offset(struct aos_buffer, stride))); | |||
x86_add(cp->func, cp->idx_EBX, stride); | |||
} | |||
else if (linear) { | |||
x86_inc(cp->func, cp->idx_EBX); | |||
} | |||
else { | |||
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); | |||
} | |||
} | |||