assert(which_reg == 1); | assert(which_reg == 1); | ||||
offset = Offset(struct aos_machine, constants); | offset = Offset(struct aos_machine, constants); | ||||
break; | break; | ||||
case X86_ATTRIBS: | |||||
case X86_BUFFERS: | |||||
assert(which_reg == 0); | assert(which_reg == 0); | ||||
offset = Offset(struct aos_machine, attrib); | |||||
offset = Offset(struct aos_machine, buffer); | |||||
break; | break; | ||||
default: | default: | ||||
assert(0); | assert(0); | ||||
save_fpu_state( &cp ); | save_fpu_state( &cp ); | ||||
set_fpu_round_nearest( &cp ); | set_fpu_round_nearest( &cp ); | ||||
aos_init_inputs( &cp, linear ); | |||||
/* Note address for loop jump | /* Note address for loop jump | ||||
*/ | */ | ||||
label = x86_get_label(cp.func); | label = x86_get_label(cp.func); | ||||
/* Incr index | /* Incr index | ||||
*/ | */ | ||||
if (linear) { | |||||
x86_inc(cp.func, cp.idx_EBX); | |||||
} | |||||
else { | |||||
x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4)); | |||||
} | |||||
aos_incr_inputs( &cp, linear ); | |||||
} | } | ||||
/* decr count, loop if not zero | /* decr count, loop if not zero | ||||
*/ | */ | ||||
unsigned stride ) | unsigned stride ) | ||||
{ | { | ||||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; | struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; | ||||
unsigned i; | |||||
for (i = 0; i < vaos->base.key.nr_inputs; i++) { | |||||
if (vaos->base.key.element[i].in.buffer == buf) { | |||||
vaos->attrib[i].input_ptr = ((char *)ptr + | |||||
vaos->base.key.element[i].in.offset); | |||||
vaos->attrib[i].input_stride = stride; | |||||
} | |||||
if (buf < vaos->nr_vb) { | |||||
vaos->buffer[buf].base_ptr = (char *)ptr; | |||||
vaos->buffer[buf].stride = stride; | |||||
} | } | ||||
} | } | ||||
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; | machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; | ||||
machine->constants = vaos->draw->vs.aligned_constants; | machine->constants = vaos->draw->vs.aligned_constants; | ||||
machine->immediates = vaos->base.vs->immediates; | machine->immediates = vaos->base.vs->immediates; | ||||
machine->attrib = vaos->attrib; | |||||
machine->buffer = vaos->buffer; | |||||
vaos->gen_run_elts( machine, | vaos->gen_run_elts( machine, | ||||
elts, | elts, | ||||
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; | machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; | ||||
machine->constants = vaos->draw->vs.aligned_constants; | machine->constants = vaos->draw->vs.aligned_constants; | ||||
machine->immediates = vaos->base.vs->immediates; | machine->immediates = vaos->base.vs->immediates; | ||||
machine->attrib = vaos->attrib; | |||||
machine->buffer = vaos->buffer; | |||||
vaos->gen_run_linear( machine, | vaos->gen_run_linear( machine, | ||||
start, | start, | ||||
{ | { | ||||
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; | struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; | ||||
FREE( vaos->attrib ); | |||||
FREE( vaos->buffer ); | |||||
x86_release_func( &vaos->func[0] ); | x86_release_func( &vaos->func[0] ); | ||||
x86_release_func( &vaos->func[1] ); | x86_release_func( &vaos->func[1] ); | ||||
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, | static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, | ||||
const struct draw_vs_varient_key *key ) | const struct draw_vs_varient_key *key ) | ||||
{ | { | ||||
unsigned i; | |||||
struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); | struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); | ||||
if (!vaos) | if (!vaos) | ||||
vaos->draw = vs->draw; | vaos->draw = vs->draw; | ||||
vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); | |||||
if (!vaos->attrib) | |||||
for (i = 0; i < key->nr_inputs; i++) | |||||
vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 ); | |||||
vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) ); | |||||
if (!vaos->buffer) | |||||
goto fail; | goto fail; | ||||
debug_printf("nr_vb: %d\n", vaos->nr_vb); | |||||
#if 0 | #if 0 | ||||
tgsi_dump(vs->state.tokens, 0); | tgsi_dump(vs->state.tokens, 0); | ||||
#endif | #endif | ||||
return &vaos->base; | return &vaos->base; | ||||
fail: | fail: | ||||
if (vaos && vaos->attrib) | |||||
FREE(vaos->attrib); | |||||
if (vaos && vaos->buffer) | |||||
FREE(vaos->buffer); | |||||
if (vaos) | if (vaos) | ||||
x86_release_func( &vaos->func[0] ); | x86_release_func( &vaos->func[0] ); |
#define MAX_SHINE_TAB 4 | #define MAX_SHINE_TAB 4 | ||||
#define MAX_LIT_INFO 16 | #define MAX_LIT_INFO 16 | ||||
struct aos_attrib { | |||||
const void *input_ptr; | |||||
unsigned input_stride; | |||||
struct aos_buffer { | |||||
const void *base_ptr; | |||||
unsigned stride; | |||||
void *ptr; /* updated per vertex */ | |||||
}; | }; | ||||
const float (*immediates)[4]; /* points to shader data */ | const float (*immediates)[4]; /* points to shader data */ | ||||
const float (*constants)[4]; /* points to draw data */ | const float (*constants)[4]; /* points to draw data */ | ||||
const struct aos_attrib *attrib; /* points to ? */ | |||||
const struct aos_buffer *buffer; /* points to ? */ | |||||
}; | }; | ||||
unsigned file, | unsigned file, | ||||
unsigned idx ); | unsigned idx ); | ||||
boolean aos_fetch_inputs( struct aos_compilation *cp, | |||||
boolean linear ); | |||||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ); | |||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ); | |||||
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ); | |||||
boolean aos_emit_outputs( struct aos_compilation *cp ); | boolean aos_emit_outputs( struct aos_compilation *cp ); | ||||
#define X86_NULL 0 | #define X86_NULL 0 | ||||
#define X86_IMMEDIATES 1 | #define X86_IMMEDIATES 1 | ||||
#define X86_CONSTANTS 2 | #define X86_CONSTANTS 2 | ||||
#define X86_ATTRIBS 3 | |||||
#define X86_BUFFERS 3 | |||||
struct x86_reg aos_get_x86( struct aos_compilation *cp, | struct x86_reg aos_get_x86( struct aos_compilation *cp, | ||||
unsigned which_reg, | unsigned which_reg, | ||||
struct draw_vs_varient base; | struct draw_vs_varient base; | ||||
struct draw_context *draw; | struct draw_context *draw; | ||||
struct aos_attrib *attrib; | |||||
struct aos_buffer *buffer; | |||||
unsigned nr_vb; | |||||
vaos_run_linear_func gen_run_linear; | vaos_run_linear_func gen_run_linear; | ||||
vaos_run_elts_func gen_run_elts; | vaos_run_elts_func gen_run_elts; |
static void get_src_ptr( struct aos_compilation *cp, | |||||
struct x86_reg src, | |||||
struct x86_reg elt, | |||||
unsigned a ) | |||||
{ | |||||
struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), | |||||
a * sizeof(struct aos_attrib)); | |||||
struct x86_reg input_ptr = x86_make_disp(attrib, | |||||
Offset(struct aos_attrib, input_ptr)); | |||||
struct x86_reg input_stride = x86_make_disp(attrib, | |||||
Offset(struct aos_attrib, input_stride)); | |||||
/* Calculate pointer to current attrib: | |||||
*/ | |||||
x86_mov(cp->func, src, input_stride); | |||||
x86_imul(cp->func, src, elt); | |||||
x86_add(cp->func, src, input_ptr); | |||||
} | |||||
/* Extended swizzles? Maybe later. | /* Extended swizzles? Maybe later. | ||||
*/ | */ | ||||
static void emit_swizzle( struct aos_compilation *cp, | static void emit_swizzle( struct aos_compilation *cp, | ||||
} | } | ||||
static boolean get_buffer_ptr( struct aos_compilation *cp, | |||||
unsigned buf_idx, | |||||
struct x86_reg elt, | |||||
struct x86_reg ptr) | |||||
{ | |||||
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), | |||||
buf_idx * sizeof(struct aos_buffer)); | |||||
struct x86_reg buf_base_ptr = x86_make_disp(buf, | |||||
Offset(struct aos_buffer, base_ptr)); | |||||
struct x86_reg buf_stride = x86_make_disp(buf, | |||||
Offset(struct aos_buffer, stride)); | |||||
/* Calculate pointer to current attrib: | |||||
*/ | |||||
x86_mov(cp->func, ptr, buf_stride); | |||||
x86_imul(cp->func, ptr, elt); | |||||
x86_add(cp->func, ptr, buf_base_ptr); | |||||
return TRUE; | |||||
} | |||||
static boolean load_input( struct aos_compilation *cp, | static boolean load_input( struct aos_compilation *cp, | ||||
unsigned idx, | unsigned idx, | ||||
boolean linear ) | |||||
struct x86_reg bufptr ) | |||||
{ | { | ||||
unsigned format = cp->vaos->base.key.element[idx].in.format; | unsigned format = cp->vaos->base.key.element[idx].in.format; | ||||
struct x86_reg src = cp->tmp_EAX; | |||||
unsigned offset = cp->vaos->base.key.element[idx].in.offset; | |||||
struct x86_reg dataXMM = aos_get_xmm_reg(cp); | struct x86_reg dataXMM = aos_get_xmm_reg(cp); | ||||
/* Figure out source pointer address: | /* Figure out source pointer address: | ||||
*/ | */ | ||||
get_src_ptr(cp, | |||||
src, | |||||
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), | |||||
idx); | |||||
src = x86_deref(src); | |||||
struct x86_reg src = x86_make_disp(bufptr, offset); | |||||
aos_adopt_xmm_reg( cp, | aos_adopt_xmm_reg( cp, | ||||
dataXMM, | dataXMM, | ||||
return TRUE; | return TRUE; | ||||
} | } | ||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) | |||||
static boolean load_inputs( struct aos_compilation *cp, | |||||
unsigned buffer, | |||||
struct x86_reg ptr ) | |||||
{ | { | ||||
unsigned i; | unsigned i; | ||||
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { | for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { | ||||
if (!load_input( cp, i, linear )) | |||||
if (cp->vaos->base.key.element[i].in.buffer == buffer) { | |||||
if (!load_input( cp, i, ptr )) | |||||
return FALSE; | |||||
cp->insn_counter++; | |||||
} | |||||
} | |||||
return TRUE; | |||||
} | |||||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) | |||||
{ | |||||
if (linear && cp->vaos->nr_vb == 1) { | |||||
struct x86_reg elt = cp->idx_EBX; | |||||
struct x86_reg ptr = cp->tmp_EAX; | |||||
if (!get_buffer_ptr( cp, 0, elt, ptr )) | |||||
return FALSE; | return FALSE; | ||||
cp->insn_counter++; | |||||
/* In the linear, single buffer case, keep the buffer pointer | |||||
* instead of the index number. | |||||
*/ | |||||
x86_mov( cp->func, elt, ptr ); | |||||
} | |||||
return TRUE; | |||||
} | |||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) | |||||
{ | |||||
if (linear && cp->vaos->nr_vb == 1) { | |||||
load_inputs( cp, 0, cp->idx_EBX ); | |||||
} | |||||
else { | |||||
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); | |||||
unsigned j; | |||||
for (j = 0; j < cp->vaos->nr_vb; j++) { | |||||
struct x86_reg ptr = cp->tmp_EAX; | |||||
if (!get_buffer_ptr( cp, j, elt, ptr )) | |||||
return FALSE; | |||||
cp->insn_counter++; | |||||
if (!load_inputs( cp, j, ptr )) | |||||
return FALSE; | |||||
} | |||||
} | } | ||||
return TRUE; | return TRUE; | ||||
} | } | ||||
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) | |||||
{ | |||||
if (linear && cp->vaos->nr_vb == 1) { | |||||
struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), | |||||
(0 * sizeof(struct aos_buffer) + | |||||
Offset(struct aos_buffer, stride))); | |||||
x86_add(cp->func, cp->idx_EBX, stride); | |||||
} | |||||
else if (linear) { | |||||
x86_inc(cp->func, cp->idx_EBX); | |||||
} | |||||
else { | |||||
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); | |||||
} | |||||
} | |||||