Conflicts: src/gallium/auxiliary/draw/draw_pt_varray.ctags/mesa_20090313
@@ -230,7 +230,7 @@ int main( int argc, char *argv[] ) | |||
glutInitWindowPosition( 0, 0 ); | |||
glutInitWindowSize( 250, 250 ); | |||
glutInitDisplayMode( GLUT_RGB | GLUT_SINGLE | GLUT_DEPTH ); | |||
glutCreateWindow(argv[0]); | |||
glutCreateWindow(argv[argc-1]); | |||
glutReshapeFunc( Reshape ); | |||
glutKeyboardFunc( Key ); | |||
glutDisplayFunc( Display ); |
@@ -26,12 +26,17 @@ C_SOURCES = \ | |||
draw_pt_emit.c \ | |||
draw_pt_fetch.c \ | |||
draw_pt_fetch_emit.c \ | |||
draw_pt_fetch_shade_emit.c \ | |||
draw_pt_fetch_shade_pipeline.c \ | |||
draw_pt_post_vs.c \ | |||
draw_pt_util.c \ | |||
draw_pt_varray.c \ | |||
draw_pt_vcache.c \ | |||
draw_vertex.c \ | |||
draw_vs.c \ | |||
draw_vs_varient.c \ | |||
draw_vs_aos.c \ | |||
draw_vs_aos_io.c \ | |||
draw_vs_exec.c \ | |||
draw_vs_llvm.c \ | |||
draw_vs_sse.c |
@@ -56,12 +56,6 @@ struct draw_context *draw_create( void ) | |||
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ | |||
tgsi_exec_machine_init(&draw->machine); | |||
/* FIXME: give this machine thing a proper constructor: | |||
*/ | |||
draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
if (!draw_pipeline_init( draw )) | |||
goto fail; | |||
@@ -69,6 +63,9 @@ struct draw_context *draw_create( void ) | |||
if (!draw_pt_init( draw )) | |||
goto fail; | |||
if (!draw_vs_init( draw )) | |||
goto fail; | |||
return draw; | |||
fail: | |||
@@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw ) | |||
return; | |||
if (draw->machine.Inputs) | |||
align_free(draw->machine.Inputs); | |||
if (draw->machine.Outputs) | |||
align_free(draw->machine.Outputs); | |||
tgsi_exec_machine_free_data(&draw->machine); | |||
/* Not so fast -- we're just borrowing this at the moment. | |||
* | |||
@@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw ) | |||
draw_pipeline_destroy( draw ); | |||
draw_pt_destroy( draw ); | |||
draw_vs_destroy( draw ); | |||
FREE( draw ); | |||
} | |||
@@ -295,7 +286,7 @@ int | |||
draw_find_vs_output(struct draw_context *draw, | |||
uint semantic_name, uint semantic_index) | |||
{ | |||
const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
uint i; | |||
for (i = 0; i < vs->info.num_outputs; i++) { | |||
if (vs->info.output_semantic_name[i] == semantic_name && | |||
@@ -320,7 +311,7 @@ draw_find_vs_output(struct draw_context *draw, | |||
uint | |||
draw_num_vs_outputs(struct draw_context *draw) | |||
{ | |||
uint count = draw->vertex_shader->info.num_outputs; | |||
uint count = draw->vs.vertex_shader->info.num_outputs; | |||
if (draw->extra_vp_outputs.slot > 0) | |||
count++; | |||
return count; |
@@ -212,6 +212,71 @@ void draw_pipeline_run( struct draw_context *draw, | |||
draw->pipeline.vertex_count = 0; | |||
} | |||
#define QUAD(i0,i1,i2,i3) \ | |||
do_triangle( draw, \ | |||
( DRAW_PIPE_RESET_STIPPLE | \ | |||
DRAW_PIPE_EDGE_FLAG_0 | \ | |||
DRAW_PIPE_EDGE_FLAG_2 ), \ | |||
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ | |||
verts + stride * (i1), \ | |||
verts + stride * (i3)); \ | |||
do_triangle( draw, \ | |||
( DRAW_PIPE_EDGE_FLAG_0 | \ | |||
DRAW_PIPE_EDGE_FLAG_1 ), \ | |||
verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ | |||
verts + stride * (i2), \ | |||
verts + stride * (i3)) | |||
#define TRIANGLE(flags,i0,i1,i2) \ | |||
do_triangle( draw, \ | |||
flags, /* flags */ \ | |||
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ | |||
verts + stride * (i1), \ | |||
verts + stride * (i2)) | |||
#define LINE(flags,i0,i1) \ | |||
do_line( draw, \ | |||
flags, \ | |||
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ | |||
verts + stride * (i+1)) | |||
#define POINT(i0) \ | |||
do_point( draw, \ | |||
verts + stride * i0 ) | |||
#define FUNC pipe_run_linear | |||
#define ARGS \ | |||
struct draw_context *draw, \ | |||
unsigned prim, \ | |||
struct vertex_header *vertices, \ | |||
unsigned stride | |||
#define LOCAL_VARS \ | |||
char *verts = (char *)vertices; \ | |||
boolean flatfirst = (draw->rasterizer->flatshade && \ | |||
draw->rasterizer->flatshade_first); \ | |||
unsigned i, flags | |||
#define FLUSH | |||
#include "draw_pt_decompose.h" | |||
void draw_pipeline_run_linear( struct draw_context *draw, | |||
unsigned prim, | |||
struct vertex_header *vertices, | |||
unsigned count, | |||
unsigned stride ) | |||
{ | |||
char *verts = (char *)vertices; | |||
draw->pipeline.verts = verts; | |||
draw->pipeline.vertex_stride = stride; | |||
draw->pipeline.vertex_count = count; | |||
pipe_run_linear(draw, prim, vertices, stride, count); | |||
draw->pipeline.verts = NULL; | |||
draw->pipeline.vertex_count = 0; | |||
} | |||
void draw_pipeline_flush( struct draw_context *draw, |
@@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage, | |||
{ | |||
struct vertex_header *tmp = stage->tmp[idx]; | |||
const uint vsize = sizeof(struct vertex_header) | |||
+ stage->draw->num_vs_outputs * 4 * sizeof(float); | |||
+ stage->draw->vs.num_vs_outputs * 4 * sizeof(float); | |||
memcpy(tmp, vert, vsize); | |||
tmp->vertex_id = UNDEFINED_VERTEX_ID; | |||
return tmp; |
@@ -653,7 +653,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) | |||
} | |||
/* update vertex attrib info */ | |||
aaline->tex_slot = draw->num_vs_outputs; | |||
aaline->tex_slot = draw->vs.num_vs_outputs; | |||
assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ | |||
/* advertise the extra post-transformed vertex attribute */ |
@@ -681,7 +681,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) | |||
bind_aapoint_fragment_shader(aapoint); | |||
/* update vertex attrib info */ | |||
aapoint->tex_slot = draw->num_vs_outputs; | |||
aapoint->tex_slot = draw->vs.num_vs_outputs; | |||
assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ | |||
draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; | |||
@@ -692,7 +692,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) | |||
aapoint->psize_slot = -1; | |||
if (draw->rasterizer->point_size_per_vertex) { | |||
/* find PSIZ vertex output */ | |||
const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
uint i; | |||
for (i = 0; i < vs->info.num_outputs; i++) { | |||
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { |
@@ -112,7 +112,7 @@ static void interp( const struct clipper *clip, | |||
const struct vertex_header *out, | |||
const struct vertex_header *in ) | |||
{ | |||
const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; | |||
const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; | |||
unsigned j; | |||
/* Vertex header. | |||
@@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, | |||
header.flags |= edge_last; | |||
if (0) { | |||
const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
uint j, k; | |||
debug_printf("Clipped tri:\n"); | |||
for (j = 0; j < 3; j++) { | |||
@@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage ) | |||
clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; | |||
if (clipper->flat) { | |||
const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
uint i; | |||
clipper->num_color_attribs = 0; |
@@ -159,7 +159,7 @@ static void flatshade_line_1( struct draw_stage *stage, | |||
static void flatshade_init_state( struct draw_stage *stage ) | |||
{ | |||
struct flat_stage *flat = flat_stage(stage); | |||
const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
uint i; | |||
/* Find which vertex shader outputs are colors, make a list */ |
@@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw, | |||
const struct vertex_header *v1 ) | |||
{ | |||
uint attr; | |||
for (attr = 0; attr < draw->num_vs_outputs; attr++) { | |||
for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { | |||
const float *val0 = v0->data[attr]; | |||
const float *val1 = v1->data[attr]; | |||
float *newv = dst->data[attr]; |
@@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage, | |||
struct prim_header *header ) | |||
{ | |||
struct twoside_stage *twoside = twoside_stage(stage); | |||
const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
uint i; | |||
twoside->attrib_front0 = 0; |
@@ -197,7 +197,7 @@ static void widepoint_first_point( struct draw_stage *stage, | |||
if (draw->rasterizer->point_sprite) { | |||
/* find vertex shader texcoord outputs */ | |||
const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
uint i, j = 0; | |||
for (i = 0; i < vs->info.num_outputs; i++) { | |||
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { | |||
@@ -212,7 +212,7 @@ static void widepoint_first_point( struct draw_stage *stage, | |||
wide->psize_slot = -1; | |||
if (draw->rasterizer->point_size_per_vertex) { | |||
/* find PSIZ vertex output */ | |||
const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
uint i; | |||
for (i = 0; i < vs->info.num_outputs; i++) { | |||
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { |
@@ -124,6 +124,7 @@ struct draw_context | |||
struct { | |||
struct { | |||
struct draw_pt_middle_end *fetch_emit; | |||
struct draw_pt_middle_end *fetch_shade_emit; | |||
struct draw_pt_middle_end *general; | |||
} middle; | |||
@@ -154,6 +155,7 @@ struct draw_context | |||
const void *constants; | |||
} user; | |||
boolean test_fse; | |||
} pt; | |||
struct { | |||
@@ -167,13 +169,26 @@ struct draw_context | |||
/* pipe state that we need: */ | |||
const struct pipe_rasterizer_state *rasterizer; | |||
struct pipe_viewport_state viewport; | |||
boolean identity_viewport; | |||
struct draw_vertex_shader *vertex_shader; | |||
struct { | |||
struct draw_vertex_shader *vertex_shader; | |||
uint num_vs_outputs; /**< convenience, from vertex_shader */ | |||
boolean identity_viewport; | |||
uint num_vs_outputs; /**< convenience, from vertex_shader */ | |||
/** TGSI program interpreter runtime state */ | |||
struct tgsi_exec_machine machine; | |||
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. | |||
*/ | |||
struct gallivm_cpu_engine *engine; | |||
struct translate *fetch; | |||
struct translate_cache *fetch_cache; | |||
struct translate *emit; | |||
struct translate_cache *emit_cache; | |||
} vs; | |||
/* Clip derived state: | |||
*/ | |||
@@ -190,16 +205,15 @@ struct draw_context | |||
unsigned reduced_prim; | |||
/** TGSI program interpreter runtime state */ | |||
struct tgsi_exec_machine machine; | |||
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. | |||
*/ | |||
struct gallivm_cpu_engine *engine; | |||
void *driver_private; | |||
}; | |||
/******************************************************************************* | |||
* Vertex shader code: | |||
*/ | |||
boolean draw_vs_init( struct draw_context *draw ); | |||
void draw_vs_destroy( struct draw_context *draw ); | |||
@@ -247,6 +261,12 @@ void draw_pipeline_run( struct draw_context *draw, | |||
const ushort *elts, | |||
unsigned count ); | |||
void draw_pipeline_run_linear( struct draw_context *draw, | |||
unsigned prim, | |||
struct vertex_header *vertices, | |||
unsigned count, | |||
unsigned stride ); | |||
void draw_pipeline_flush( struct draw_context *draw, |
@@ -64,7 +64,7 @@ draw_pt_arrays(struct draw_context *draw, | |||
opt |= PT_PIPELINE; | |||
} | |||
if (!draw->bypass_clipping) { | |||
if (!draw->bypass_clipping && !draw->pt.test_fse) { | |||
opt |= PT_CLIPTEST; | |||
} | |||
@@ -72,16 +72,18 @@ draw_pt_arrays(struct draw_context *draw, | |||
opt |= PT_SHADE; | |||
} | |||
if (opt) | |||
middle = draw->pt.middle.general; | |||
else | |||
if (opt == 0) | |||
middle = draw->pt.middle.fetch_emit; | |||
else if (opt == PT_SHADE && draw->pt.test_fse) | |||
middle = draw->pt.middle.fetch_shade_emit; | |||
else | |||
middle = draw->pt.middle.general; | |||
/* Pick the right frontend | |||
*/ | |||
if (draw->pt.user.elts || | |||
count >= 256) { | |||
if (draw->pt.user.elts || (opt & PT_PIPELINE)) { | |||
frontend = draw->pt.front.vcache; | |||
} else { | |||
frontend = draw->pt.front.varray; | |||
@@ -102,6 +104,8 @@ draw_pt_arrays(struct draw_context *draw, | |||
boolean draw_pt_init( struct draw_context *draw ) | |||
{ | |||
draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; | |||
draw->pt.front.vcache = draw_pt_vcache( draw ); | |||
if (!draw->pt.front.vcache) | |||
return FALSE; | |||
@@ -114,6 +118,13 @@ boolean draw_pt_init( struct draw_context *draw ) | |||
if (!draw->pt.middle.fetch_emit) | |||
return FALSE; | |||
if (draw->pt.test_fse) { | |||
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); | |||
if (!draw->pt.middle.fetch_shade_emit) | |||
return FALSE; | |||
} | |||
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); | |||
if (!draw->pt.middle.general) | |||
return FALSE; | |||
@@ -134,6 +145,11 @@ void draw_pt_destroy( struct draw_context *draw ) | |||
draw->pt.middle.fetch_emit = NULL; | |||
} | |||
if (draw->pt.middle.fetch_shade_emit) { | |||
draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit ); | |||
draw->pt.middle.fetch_shade_emit = NULL; | |||
} | |||
if (draw->pt.front.vcache) { | |||
draw->pt.front.vcache->destroy( draw->pt.front.vcache ); | |||
draw->pt.front.vcache = NULL; | |||
@@ -147,19 +163,6 @@ void draw_pt_destroy( struct draw_context *draw ) | |||
static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { | |||
PIPE_PRIM_POINTS, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES | |||
}; | |||
/** | |||
* Draw vertex arrays | |||
@@ -172,9 +175,10 @@ void | |||
draw_arrays(struct draw_context *draw, unsigned prim, | |||
unsigned start, unsigned count) | |||
{ | |||
if (reduced_prim[prim] != draw->reduced_prim) { | |||
unsigned reduced_prim = draw_pt_reduced_prim(prim); | |||
if (reduced_prim != draw->reduced_prim) { | |||
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); | |||
draw->reduced_prim = reduced_prim[prim]; | |||
draw->reduced_prim = reduced_prim; | |||
} | |||
/* drawing done here: */ |
@@ -92,6 +92,10 @@ struct draw_pt_middle_end { | |||
const ushort *draw_elts, | |||
unsigned draw_count ); | |||
void (*run_linear)(struct draw_pt_middle_end *, | |||
unsigned start, | |||
unsigned count); | |||
void (*finish)( struct draw_pt_middle_end * ); | |||
void (*destroy)( struct draw_pt_middle_end * ); | |||
}; | |||
@@ -117,6 +121,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, | |||
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); | |||
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); | |||
/* Middle-ends: | |||
* | |||
* Currently one general-purpose case which can do all possibilities, | |||
@@ -128,6 +133,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); | |||
* vertex_elements. | |||
*/ | |||
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); | |||
struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); | |||
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); | |||
@@ -152,6 +158,13 @@ void draw_pt_emit( struct pt_emit *emit, | |||
const ushort *elts, | |||
unsigned count ); | |||
void draw_pt_emit_linear( struct pt_emit *emit, | |||
const float (*vertex_data)[4], | |||
unsigned vertex_count, | |||
unsigned stride, | |||
unsigned start, | |||
unsigned count ); | |||
void draw_pt_emit_destroy( struct pt_emit *emit ); | |||
struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); | |||
@@ -170,6 +183,11 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, | |||
unsigned count, | |||
char *verts ); | |||
void draw_pt_fetch_run_linear( struct pt_fetch *fetch, | |||
unsigned start, | |||
unsigned count, | |||
char *verts ); | |||
void draw_pt_fetch_destroy( struct pt_fetch *fetch ); | |||
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); | |||
@@ -194,4 +212,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); | |||
void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); | |||
/******************************************************************************* | |||
* Utils: | |||
*/ | |||
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr); | |||
unsigned draw_pt_reduced_prim(unsigned prim); | |||
#endif |
@@ -0,0 +1,153 @@ | |||
static void FUNC( ARGS, | |||
unsigned count ) | |||
{ | |||
LOCAL_VARS; | |||
switch (prim) { | |||
case PIPE_PRIM_POINTS: | |||
for (i = 0; i < count; i ++) { | |||
POINT( (i + 0) ); | |||
} | |||
break; | |||
case PIPE_PRIM_LINES: | |||
for (i = 0; i+1 < count; i += 2) { | |||
LINE( DRAW_PIPE_RESET_STIPPLE, | |||
(i + 0), | |||
(i + 1)); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_LOOP: | |||
if (count >= 2) { | |||
flags = DRAW_PIPE_RESET_STIPPLE; | |||
for (i = 1; i < count; i++, flags = 0) { | |||
LINE( flags, | |||
(i - 1), | |||
(i )); | |||
} | |||
LINE( flags, | |||
(i - 1), | |||
(0 )); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_STRIP: | |||
flags = DRAW_PIPE_RESET_STIPPLE; | |||
for (i = 1; i < count; i++, flags = 0) { | |||
LINE( flags, | |||
(i - 1), | |||
(i )); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLES: | |||
for (i = 0; i+2 < count; i += 3) { | |||
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
(i + 0), | |||
(i + 1), | |||
(i + 2 )); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
if (flatfirst) { | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
(i + 0), | |||
(i + 1 + (i&1)), | |||
(i + 2 - (i&1))); | |||
} | |||
} | |||
else { | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
(i + 0 + (i&1)), | |||
(i + 1 - (i&1)), | |||
(i + 2 )); | |||
} | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_FAN: | |||
if (count >= 3) { | |||
if (flatfirst) { | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
(i + 1), | |||
(i + 2), | |||
(0 )); | |||
} | |||
} | |||
else { | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
(0), | |||
(i + 1), | |||
(i + 2 )); | |||
} | |||
} | |||
} | |||
break; | |||
case PIPE_PRIM_QUADS: | |||
for (i = 0; i+3 < count; i += 4) { | |||
QUAD( (i + 0), | |||
(i + 1), | |||
(i + 2), | |||
(i + 3)); | |||
} | |||
break; | |||
case PIPE_PRIM_QUAD_STRIP: | |||
for (i = 0; i+3 < count; i += 2) { | |||
QUAD( (i + 2), | |||
(i + 0), | |||
(i + 1), | |||
(i + 3)); | |||
} | |||
break; | |||
case PIPE_PRIM_POLYGON: | |||
{ | |||
/* These bitflags look a little odd because we submit the | |||
* vertices as (1,2,0) to satisfy flatshade requirements. | |||
*/ | |||
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; | |||
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; | |||
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; | |||
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; | |||
for (i = 0; i+2 < count; i++, flags = edge_middle) { | |||
if (i + 3 == count) | |||
flags |= edge_last; | |||
TRIANGLE( flags, | |||
(i + 1), | |||
(i + 2), | |||
(0)); | |||
} | |||
} | |||
break; | |||
default: | |||
assert(0); | |||
break; | |||
} | |||
FLUSH; | |||
} | |||
#undef TRIANGLE | |||
#undef QUAD | |||
#undef POINT | |||
#undef LINE | |||
#undef FUNC |
@@ -40,6 +40,9 @@ struct pt_emit { | |||
struct translate *translate; | |||
struct translate_cache *cache; | |||
unsigned prim; | |||
const struct vertex_info *vinfo; | |||
}; | |||
void draw_pt_emit_prepare( struct pt_emit *emit, | |||
@@ -51,8 +54,18 @@ void draw_pt_emit_prepare( struct pt_emit *emit, | |||
struct translate_key hw_key; | |||
unsigned i; | |||
boolean ok; | |||
/* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
*/ | |||
draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
ok = draw->render->set_primitive(draw->render, prim); | |||
/* XXX: may need to defensively reset this later on as clipping can | |||
* clobber this state in the render backend. | |||
*/ | |||
emit->prim = prim; | |||
ok = draw->render->set_primitive(draw->render, emit->prim); | |||
if (!ok) { | |||
assert(0); | |||
return; | |||
@@ -60,7 +73,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, | |||
/* Must do this after set_primitive() above: | |||
*/ | |||
vinfo = draw->render->get_vertex_info(draw->render); | |||
emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render); | |||
/* Translate from pipeline vertices to hw vertices. | |||
@@ -100,6 +113,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, | |||
case EMIT_4UB: | |||
output_format = PIPE_FORMAT_B8G8R8A8_UNORM; | |||
emit_sz = 4 * sizeof(ubyte); | |||
break; | |||
default: | |||
assert(0); | |||
output_format = PIPE_FORMAT_NONE; | |||
@@ -144,6 +158,14 @@ void draw_pt_emit( struct pt_emit *emit, | |||
*/ | |||
draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
/* XXX: and work out some way to coordinate the render primitive | |||
* between vbuf.c and here... | |||
*/ | |||
if (!draw->render->set_primitive(draw->render, emit->prim)) { | |||
assert(0); | |||
return; | |||
} | |||
hw_verts = render->allocate_vertices(render, | |||
(ushort)translate->key.output_stride, | |||
(ushort)vertex_count); | |||
@@ -178,6 +200,72 @@ void draw_pt_emit( struct pt_emit *emit, | |||
} | |||
void draw_pt_emit_linear(struct pt_emit *emit, | |||
const float (*vertex_data)[4], | |||
unsigned vertex_count, | |||
unsigned stride, | |||
unsigned start, | |||
unsigned count) | |||
{ | |||
struct draw_context *draw = emit->draw; | |||
struct translate *translate = emit->translate; | |||
struct vbuf_render *render = draw->render; | |||
void *hw_verts; | |||
#if 0 | |||
debug_printf("Linear emit\n"); | |||
#endif | |||
/* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
*/ | |||
draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
/* XXX: and work out some way to coordinate the render primitive | |||
* between vbuf.c and here... | |||
*/ | |||
if (!draw->render->set_primitive(draw->render, emit->prim)) { | |||
assert(0); | |||
return; | |||
} | |||
hw_verts = render->allocate_vertices(render, | |||
(ushort)translate->key.output_stride, | |||
(ushort)count); | |||
if (!hw_verts) { | |||
assert(0); | |||
return; | |||
} | |||
translate->set_buffer(translate, 0, | |||
vertex_data, stride); | |||
translate->set_buffer(translate, 1, | |||
&draw->rasterizer->point_size, | |||
0); | |||
translate->run(translate, | |||
0, | |||
vertex_count, | |||
hw_verts); | |||
if (0) { | |||
unsigned i; | |||
for (i = 0; i < vertex_count; i++) { | |||
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); | |||
draw_dump_emitted_vertex( emit->vinfo, | |||
(const uint8_t *)hw_verts + | |||
translate->key.output_stride * i ); | |||
} | |||
} | |||
render->draw_arrays(render, start, count); | |||
render->release_vertices(render, | |||
hw_verts, | |||
translate->key.output_stride, | |||
vertex_count); | |||
} | |||
struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) | |||
{ | |||
struct pt_emit *emit = CALLOC_STRUCT(pt_emit); |
@@ -166,6 +166,42 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, | |||
} | |||
void draw_pt_fetch_run_linear( struct pt_fetch *fetch, | |||
unsigned start, | |||
unsigned count, | |||
char *verts ) | |||
{ | |||
struct draw_context *draw = fetch->draw; | |||
struct translate *translate = fetch->translate; | |||
unsigned i; | |||
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { | |||
translate->set_buffer(translate, | |||
i, | |||
((char *)draw->pt.user.vbuffer[i] + | |||
draw->pt.vertex_buffer[i].buffer_offset), | |||
draw->pt.vertex_buffer[i].pitch ); | |||
} | |||
translate->run( translate, | |||
start, | |||
count, | |||
verts ); | |||
/* Edgeflags are hard to fit into a translate program, populate | |||
* them separately if required. In the setup above they are | |||
* defaulted to one, so only need this if there is reason to change | |||
* that default: | |||
*/ | |||
if (fetch->need_edgeflags) { | |||
for (i = 0; i < count; i++) { | |||
struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); | |||
vh->edgeflag = draw_pt_get_edgeflag( draw, start + i ); | |||
} | |||
} | |||
} | |||
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) | |||
{ | |||
struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); |
@@ -258,6 +258,59 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, | |||
} | |||
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, | |||
unsigned start, | |||
unsigned count ) | |||
{ | |||
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; | |||
struct draw_context *draw = feme->draw; | |||
void *hw_verts; | |||
/* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
*/ | |||
draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
hw_verts = draw->render->allocate_vertices( draw->render, | |||
(ushort)feme->translate->key.output_stride, | |||
(ushort)count ); | |||
if (!hw_verts) { | |||
assert(0); | |||
return; | |||
} | |||
/* Single routine to fetch vertices and emit HW verts. | |||
*/ | |||
feme->translate->run( feme->translate, | |||
start, | |||
count, | |||
hw_verts ); | |||
if (0) { | |||
unsigned i; | |||
for (i = 0; i < count; i++) { | |||
debug_printf("\n\nvertex %d:\n", i); | |||
draw_dump_emitted_vertex( feme->vinfo, | |||
(const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); | |||
} | |||
} | |||
/* XXX: Draw arrays path to avoid re-emitting index list again and | |||
* again. | |||
*/ | |||
draw->render->draw_arrays( draw->render, | |||
0, /*start*/ | |||
count ); | |||
/* Done -- that was easy, wasn't it: | |||
*/ | |||
draw->render->release_vertices( draw->render, | |||
hw_verts, | |||
feme->translate->key.output_stride, | |||
count ); | |||
} | |||
static void fetch_emit_finish( struct draw_pt_middle_end *middle ) | |||
{ | |||
@@ -287,10 +340,11 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) | |||
return NULL; | |||
} | |||
fetch_emit->base.prepare = fetch_emit_prepare; | |||
fetch_emit->base.run = fetch_emit_run; | |||
fetch_emit->base.finish = fetch_emit_finish; | |||
fetch_emit->base.destroy = fetch_emit_destroy; | |||
fetch_emit->base.prepare = fetch_emit_prepare; | |||
fetch_emit->base.run = fetch_emit_run; | |||
fetch_emit->base.run_linear = fetch_emit_run_linear; | |||
fetch_emit->base.finish = fetch_emit_finish; | |||
fetch_emit->base.destroy = fetch_emit_destroy; | |||
fetch_emit->draw = draw; | |||
@@ -0,0 +1,344 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/* | |||
* Authors: | |||
* Keith Whitwell <keith@tungstengraphics.com> | |||
*/ | |||
#include "pipe/p_util.h" | |||
#include "draw/draw_context.h" | |||
#include "draw/draw_private.h" | |||
#include "draw/draw_vbuf.h" | |||
#include "draw/draw_vertex.h" | |||
#include "draw/draw_pt.h" | |||
#include "draw/draw_vs.h" | |||
#include "translate/translate.h" | |||
struct fetch_shade_emit; | |||
/* Prototype fetch, shade, emit-hw-verts all in one go. | |||
*/ | |||
struct fetch_shade_emit { | |||
struct draw_pt_middle_end base; | |||
struct draw_context *draw; | |||
/* Temporaries: | |||
*/ | |||
const float *constants; | |||
unsigned pitch[PIPE_MAX_ATTRIBS]; | |||
const ubyte *src[PIPE_MAX_ATTRIBS]; | |||
unsigned prim; | |||
struct draw_vs_varient_key key; | |||
struct draw_vs_varient *active; | |||
const struct vertex_info *vinfo; | |||
}; | |||
static void fse_prepare( struct draw_pt_middle_end *middle, | |||
unsigned prim, | |||
unsigned opt ) | |||
{ | |||
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; | |||
struct draw_context *draw = fse->draw; | |||
unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; | |||
const struct vertex_info *vinfo; | |||
unsigned i; | |||
if (!draw->render->set_primitive( draw->render, | |||
prim )) { | |||
assert(0); | |||
return; | |||
} | |||
/* Must do this after set_primitive() above: | |||
*/ | |||
fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render); | |||
fse->key.output_stride = vinfo->size * 4; | |||
fse->key.nr_outputs = vinfo->num_attribs; | |||
fse->key.nr_inputs = num_vs_inputs; | |||
fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */ | |||
fse->key.nr_inputs); /* inputs - fetch from api format */ | |||
fse->key.viewport = !draw->identity_viewport; | |||
fse->key.clip = !draw->bypass_clipping; | |||
fse->key.pad = 0; | |||
memset(fse->key.element, 0, | |||
fse->key.nr_elements * sizeof(fse->key.element[0])); | |||
for (i = 0; i < num_vs_inputs; i++) { | |||
const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; | |||
fse->key.element[i].in.format = src->src_format; | |||
/* Consider ignoring these, ie make generated programs | |||
* independent of this state: | |||
*/ | |||
fse->key.element[i].in.buffer = src->vertex_buffer_index; | |||
fse->key.element[i].in.offset = src->src_offset; | |||
} | |||
{ | |||
unsigned dst_offset = 0; | |||
for (i = 0; i < vinfo->num_attribs; i++) { | |||
unsigned emit_sz = 0; | |||
switch (vinfo->emit[i]) { | |||
case EMIT_4F: | |||
emit_sz = 4 * sizeof(float); | |||
break; | |||
case EMIT_3F: | |||
emit_sz = 3 * sizeof(float); | |||
break; | |||
case EMIT_2F: | |||
emit_sz = 2 * sizeof(float); | |||
break; | |||
case EMIT_1F: | |||
emit_sz = 1 * sizeof(float); | |||
break; | |||
case EMIT_1F_PSIZE: | |||
emit_sz = 1 * sizeof(float); | |||
break; | |||
case EMIT_4UB: | |||
emit_sz = 4 * sizeof(ubyte); | |||
break; | |||
default: | |||
assert(0); | |||
break; | |||
} | |||
/* The elements in the key correspond to vertex shader output | |||
* numbers, not to positions in the hw vertex description -- | |||
* that's handled by the output_offset field. | |||
*/ | |||
fse->key.element[i].out.format = vinfo->emit[i]; | |||
fse->key.element[i].out.vs_output = vinfo->src_index[i]; | |||
fse->key.element[i].out.offset = dst_offset; | |||
dst_offset += emit_sz; | |||
assert(fse->key.output_stride >= dst_offset); | |||
} | |||
} | |||
/* Would normally look up a vertex shader and peruse its list of | |||
* varients somehow. We omitted that step and put all the | |||
* hardcoded "shaders" into an array. We're just making the | |||
* assumption that this happens to be a matching shader... ie | |||
* you're running isosurf, aren't you? | |||
*/ | |||
fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, | |||
&fse->key ); | |||
if (!fse->active) { | |||
assert(0); | |||
return ; | |||
} | |||
/* Now set buffer pointers: | |||
*/ | |||
for (i = 0; i < num_vs_inputs; i++) { | |||
unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; | |||
fse->active->set_input( fse->active, | |||
i, | |||
((const ubyte *) draw->pt.user.vbuffer[buf] + | |||
draw->pt.vertex_buffer[buf].buffer_offset), | |||
draw->pt.vertex_buffer[buf].pitch ); | |||
} | |||
fse->active->set_constants( fse->active, | |||
(const float (*)[4])draw->pt.user.constants ); | |||
fse->active->set_viewport( fse->active, | |||
&draw->viewport ); | |||
//return TRUE; | |||
} | |||
static void fse_run_linear( struct draw_pt_middle_end *middle, | |||
unsigned start, | |||
unsigned count ) | |||
{ | |||
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; | |||
struct draw_context *draw = fse->draw; | |||
unsigned alloc_count = align(count, 4); | |||
char *hw_verts; | |||
/* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
*/ | |||
draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
hw_verts = draw->render->allocate_vertices( draw->render, | |||
(ushort)fse->key.output_stride, | |||
(ushort)alloc_count ); | |||
if (!hw_verts) { | |||
assert(0); | |||
return; | |||
} | |||
/* Single routine to fetch vertices, run shader and emit HW verts. | |||
* Clipping is done elsewhere -- either by the API or on hardware, | |||
* or for some other reason not required... | |||
*/ | |||
fse->active->run_linear( fse->active, | |||
start, count, | |||
hw_verts ); | |||
/* Draw arrays path to avoid re-emitting index list again and | |||
* again. | |||
*/ | |||
draw->render->draw_arrays( draw->render, | |||
0, | |||
count ); | |||
if (0) { | |||
unsigned i; | |||
for (i = 0; i < count; i++) { | |||
debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i, | |||
fse->key.output_stride, | |||
fse->key.output_stride * i); | |||
draw_dump_emitted_vertex( fse->vinfo, | |||
(const uint8_t *)hw_verts + fse->key.output_stride * i ); | |||
} | |||
} | |||
draw->render->release_vertices( draw->render, | |||
hw_verts, | |||
fse->key.output_stride, | |||
count ); | |||
} | |||
static void | |||
fse_run(struct draw_pt_middle_end *middle, | |||
const unsigned *fetch_elts, | |||
unsigned fetch_count, | |||
const ushort *draw_elts, | |||
unsigned draw_count ) | |||
{ | |||
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; | |||
struct draw_context *draw = fse->draw; | |||
unsigned alloc_count = align(fetch_count, 4); | |||
void *hw_verts; | |||
/* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
*/ | |||
draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
hw_verts = draw->render->allocate_vertices( draw->render, | |||
(ushort)fse->key.output_stride, | |||
(ushort)alloc_count ); | |||
if (!hw_verts) { | |||
assert(0); | |||
return; | |||
} | |||
/* Single routine to fetch vertices, run shader and emit HW verts. | |||
*/ | |||
fse->active->run_elts( fse->active, | |||
fetch_elts, | |||
fetch_count, | |||
hw_verts ); | |||
draw->render->draw( draw->render, | |||
draw_elts, | |||
draw_count ); | |||
if (0) { | |||
unsigned i; | |||
for (i = 0; i < fetch_count; i++) { | |||
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); | |||
draw_dump_emitted_vertex( fse->vinfo, | |||
(const uint8_t *)hw_verts + | |||
fse->key.output_stride * i ); | |||
} | |||
} | |||
draw->render->release_vertices( draw->render, | |||
hw_verts, | |||
fse->key.output_stride, | |||
fetch_count ); | |||
} | |||
static void fse_finish( struct draw_pt_middle_end *middle ) | |||
{ | |||
} | |||
static void | |||
fse_destroy( struct draw_pt_middle_end *middle ) | |||
{ | |||
FREE(middle); | |||
} | |||
struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) | |||
{ | |||
struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); | |||
if (!fse) | |||
return NULL; | |||
fse->base.prepare = fse_prepare; | |||
fse->base.run = fse_run; | |||
fse->base.run_linear = fse_run_linear; | |||
fse->base.finish = fse_finish; | |||
fse->base.destroy = fse_destroy; | |||
fse->draw = draw; | |||
return &fse->base; | |||
} |
@@ -55,7 +55,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, | |||
{ | |||
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; | |||
struct draw_context *draw = fpme->draw; | |||
struct draw_vertex_shader *vs = draw->vertex_shader; | |||
struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
/* Add one to num_outputs because the pipeline occasionally tags on | |||
* an additional texcoord, eg for AA lines. | |||
@@ -107,7 +107,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, | |||
{ | |||
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; | |||
struct draw_context *draw = fpme->draw; | |||
struct draw_vertex_shader *shader = draw->vertex_shader; | |||
struct draw_vertex_shader *shader = draw->vs.vertex_shader; | |||
unsigned opt = fpme->opt; | |||
unsigned alloc_count = align_int( fetch_count, 4 ); | |||
@@ -162,7 +162,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, | |||
fpme->vertex_size, | |||
draw_elts, | |||
draw_count ); | |||
} | |||
} | |||
else { | |||
draw_pt_emit( fpme->emit, | |||
(const float (*)[4])pipeline_verts->data, | |||
@@ -177,6 +177,79 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, | |||
} | |||
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, | |||
unsigned start, | |||
unsigned count) | |||
{ | |||
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; | |||
struct draw_context *draw = fpme->draw; | |||
struct draw_vertex_shader *shader = draw->vs.vertex_shader; | |||
unsigned opt = fpme->opt; | |||
unsigned alloc_count = align_int( count, 4 ); | |||
struct vertex_header *pipeline_verts = | |||
(struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); | |||
if (!pipeline_verts) { | |||
/* Not much we can do here - just skip the rendering. | |||
*/ | |||
assert(0); | |||
return; | |||
} | |||
/* Fetch into our vertex buffer | |||
*/ | |||
draw_pt_fetch_run_linear( fpme->fetch, | |||
start, | |||
count, | |||
(char *)pipeline_verts ); | |||
/* Run the shader, note that this overwrites the data[] parts of | |||
* the pipeline verts. If there is no shader, ie a bypass shader, | |||
* then the inputs == outputs, and are already in the correct | |||
* place. | |||
*/ | |||
if (opt & PT_SHADE) | |||
{ | |||
shader->run_linear(shader, | |||
(const float (*)[4])pipeline_verts->data, | |||
( float (*)[4])pipeline_verts->data, | |||
(const float (*)[4])draw->pt.user.constants, | |||
count, | |||
fpme->vertex_size, | |||
fpme->vertex_size); | |||
} | |||
if (draw_pt_post_vs_run( fpme->post_vs, | |||
pipeline_verts, | |||
count, | |||
fpme->vertex_size )) | |||
{ | |||
opt |= PT_PIPELINE; | |||
} | |||
/* Do we need to run the pipeline? | |||
*/ | |||
if (opt & PT_PIPELINE) { | |||
draw_pipeline_run_linear( fpme->draw, | |||
fpme->prim, | |||
pipeline_verts, | |||
count, | |||
fpme->vertex_size); | |||
} | |||
else { | |||
draw_pt_emit_linear( fpme->emit, | |||
(const float (*)[4])pipeline_verts->data, | |||
count, | |||
fpme->vertex_size, | |||
0, /*start*/ | |||
count ); | |||
} | |||
FREE(pipeline_verts); | |||
} | |||
static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) | |||
{ | |||
@@ -206,10 +279,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * | |||
if (!fpme) | |||
goto fail; | |||
fpme->base.prepare = fetch_pipeline_prepare; | |||
fpme->base.run = fetch_pipeline_run; | |||
fpme->base.finish = fetch_pipeline_finish; | |||
fpme->base.destroy = fetch_pipeline_destroy; | |||
fpme->base.prepare = fetch_pipeline_prepare; | |||
fpme->base.run = fetch_pipeline_run; | |||
fpme->base.run_linear = fetch_pipeline_linear_run; | |||
fpme->base.finish = fetch_pipeline_finish; | |||
fpme->base.destroy = fetch_pipeline_destroy; | |||
fpme->draw = draw; | |||
@@ -0,0 +1,103 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/* | |||
* Authors: | |||
* Keith Whitwell <keith@tungstengraphics.com> | |||
*/ | |||
#include "pipe/p_util.h" | |||
#include "draw/draw_context.h" | |||
#include "draw/draw_private.h" | |||
#include "draw/draw_pt.h" | |||
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) | |||
{ | |||
switch (prim) { | |||
case PIPE_PRIM_POINTS: | |||
*first = 1; | |||
*incr = 1; | |||
break; | |||
case PIPE_PRIM_LINES: | |||
*first = 2; | |||
*incr = 2; | |||
break; | |||
case PIPE_PRIM_LINE_STRIP: | |||
case PIPE_PRIM_LINE_LOOP: | |||
*first = 2; | |||
*incr = 1; | |||
break; | |||
case PIPE_PRIM_TRIANGLES: | |||
*first = 3; | |||
*incr = 3; | |||
break; | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
case PIPE_PRIM_TRIANGLE_FAN: | |||
case PIPE_PRIM_POLYGON: | |||
*first = 3; | |||
*incr = 1; | |||
break; | |||
case PIPE_PRIM_QUADS: | |||
*first = 4; | |||
*incr = 4; | |||
break; | |||
case PIPE_PRIM_QUAD_STRIP: | |||
*first = 4; | |||
*incr = 2; | |||
break; | |||
default: | |||
assert(0); | |||
*first = 0; | |||
*incr = 1; /* set to one so that count % incr works */ | |||
break; | |||
} | |||
} | |||
unsigned draw_pt_reduced_prim(unsigned prim) | |||
{ | |||
switch (prim) { | |||
case PIPE_PRIM_POINTS: | |||
return PIPE_PRIM_POINTS; | |||
case PIPE_PRIM_LINES: | |||
case PIPE_PRIM_LINE_STRIP: | |||
case PIPE_PRIM_LINE_LOOP: | |||
return PIPE_PRIM_LINES; | |||
case PIPE_PRIM_TRIANGLES: | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
case PIPE_PRIM_TRIANGLE_FAN: | |||
case PIPE_PRIM_POLYGON: | |||
case PIPE_PRIM_QUADS: | |||
case PIPE_PRIM_QUAD_STRIP: | |||
return PIPE_PRIM_TRIANGLES; | |||
default: | |||
assert(0); | |||
return PIPE_PRIM_POINTS; | |||
} | |||
} | |||
@@ -43,6 +43,8 @@ struct varray_frontend { | |||
unsigned draw_count; | |||
unsigned fetch_count; | |||
unsigned fetch_start; | |||
struct draw_pt_middle_end *middle; | |||
unsigned input_prim; | |||
@@ -56,6 +58,11 @@ static void varray_flush(struct varray_frontend *varray) | |||
debug_printf("FLUSH fc = %d, dc = %d\n", | |||
varray->fetch_count, | |||
varray->draw_count); | |||
debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n", | |||
varray->fetch_elts[0], | |||
varray->fetch_elts[varray->fetch_count-1], | |||
varray->draw_elts[0], | |||
varray->draw_elts[varray->draw_count-1]); | |||
#endif | |||
varray->middle->run(varray->middle, | |||
varray->fetch_elts, | |||
@@ -68,20 +75,43 @@ static void varray_flush(struct varray_frontend *varray) | |||
varray->draw_count = 0; | |||
} | |||
#if 0 | |||
static void varray_check_flush(struct varray_frontend *varray) | |||
static void varray_flush_linear(struct varray_frontend *varray, | |||
unsigned start, unsigned count) | |||
{ | |||
if (varray->draw_count + 6 >= DRAW_MAX/* || | |||
varray->fetch_count + 4 >= FETCH_MAX*/) { | |||
varray_flush(varray); | |||
if (count) { | |||
#if 0 | |||
debug_printf("FLUSH LINEAR start = %d, count = %d\n", | |||
start, | |||
count); | |||
#endif | |||
assert(varray->middle->run_linear); | |||
varray->middle->run_linear(varray->middle, start, count); | |||
} | |||
} | |||
static INLINE void fetch_init(struct varray_frontend *varray, | |||
unsigned count) | |||
{ | |||
unsigned idx; | |||
#if 0 | |||
debug_printf("FETCH INIT c = %d, fs = %d\n", | |||
count, | |||
varray->fetch_start); | |||
#endif | |||
for (idx = 0; idx < count; ++idx) { | |||
varray->fetch_elts[idx] = varray->fetch_start + idx; | |||
} | |||
varray->fetch_start += idx; | |||
varray->fetch_count = idx; | |||
} | |||
static INLINE void add_draw_el(struct varray_frontend *varray, | |||
int idx, ushort flags) | |||
int idx) | |||
{ | |||
varray->draw_elts[varray->draw_count++] = idx | flags; | |||
varray->draw_elts[varray->draw_count++] = idx; | |||
} | |||
@@ -90,106 +120,52 @@ static INLINE void varray_triangle( struct varray_frontend *varray, | |||
unsigned i1, | |||
unsigned i2 ) | |||
{ | |||
add_draw_el(varray, i0, 0); | |||
add_draw_el(varray, i1, 0); | |||
add_draw_el(varray, i2, 0); | |||
} | |||
static INLINE void varray_triangle_flags( struct varray_frontend *varray, | |||
ushort flags, | |||
unsigned i0, | |||
unsigned i1, | |||
unsigned i2 ) | |||
{ | |||
add_draw_el(varray, i0, flags); | |||
add_draw_el(varray, i1, 0); | |||
add_draw_el(varray, i2, 0); | |||
add_draw_el(varray, i0); | |||
add_draw_el(varray, i1); | |||
add_draw_el(varray, i2); | |||
} | |||
static INLINE void varray_line( struct varray_frontend *varray, | |||
unsigned i0, | |||
unsigned i1 ) | |||
{ | |||
add_draw_el(varray, i0, 0); | |||
add_draw_el(varray, i1, 0); | |||
} | |||
static INLINE void varray_line_flags( struct varray_frontend *varray, | |||
ushort flags, | |||
unsigned i0, | |||
unsigned i1 ) | |||
{ | |||
add_draw_el(varray, i0, flags); | |||
add_draw_el(varray, i1, 0); | |||
add_draw_el(varray, i0); | |||
add_draw_el(varray, i1); | |||
} | |||
static INLINE void varray_point( struct varray_frontend *varray, | |||
unsigned i0 ) | |||
{ | |||
add_draw_el(varray, i0, 0); | |||
} | |||
static INLINE void varray_quad( struct varray_frontend *varray, | |||
unsigned i0, | |||
unsigned i1, | |||
unsigned i2, | |||
unsigned i3 ) | |||
{ | |||
varray_triangle( varray, i0, i1, i3 ); | |||
varray_triangle( varray, i1, i2, i3 ); | |||
} | |||
static INLINE void varray_ef_quad( struct varray_frontend *varray, | |||
unsigned i0, | |||
unsigned i1, | |||
unsigned i2, | |||
unsigned i3 ) | |||
{ | |||
const ushort omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; | |||
const ushort omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; | |||
varray_triangle_flags( varray, | |||
DRAW_PIPE_RESET_STIPPLE | omitEdge1, | |||
i0, i1, i3 ); | |||
varray_triangle_flags( varray, | |||
omitEdge2, | |||
i1, i2, i3 ); | |||
add_draw_el(varray, i0); | |||
} | |||
/* At least for now, we're back to using a template include file for | |||
* this. The two paths aren't too different though - it may be | |||
* possible to reunify them. | |||
*/ | |||
#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2) | |||
#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3) | |||
#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1) | |||
#define POINT(vc,i0) varray_point(vc,i0) | |||
#define FUNC varray_run_extras | |||
#include "draw_pt_varray_tmp.h" | |||
#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2) | |||
#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3) | |||
#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1) | |||
#if 0 | |||
#define TRIANGLE(flags,i0,i1,i2) varray_triangle(varray,i0,i1,i2) | |||
#define LINE(flags,i0,i1) varray_line(varray,i0,i1) | |||
#define POINT(i0) varray_point(varray,i0) | |||
#define FUNC varray_decompose | |||
#include "draw_pt_decompose.h" | |||
#else | |||
#define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2) | |||
#define LINE(vc,i0,i1) varray_line(vc,i0,i1) | |||
#define POINT(vc,i0) varray_point(vc,i0) | |||
#define FUNC varray_run | |||
#include "draw_pt_varray_tmp.h" | |||
#include "draw_pt_varray_tmp_linear.h" | |||
#endif | |||
static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { | |||
static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { | |||
PIPE_PRIM_POINTS, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, /* decomposed LINELOOP */ | |||
PIPE_PRIM_LINE_STRIP, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES | |||
PIPE_PRIM_TRIANGLE_STRIP, | |||
PIPE_PRIM_TRIANGLES, /* decomposed TRI_FAN */ | |||
PIPE_PRIM_QUADS, | |||
PIPE_PRIM_QUAD_STRIP, | |||
PIPE_PRIM_TRIANGLES /* decomposed POLYGON */ | |||
}; | |||
@@ -201,17 +177,10 @@ static void varray_prepare(struct draw_pt_front_end *frontend, | |||
{ | |||
struct varray_frontend *varray = (struct varray_frontend *)frontend; | |||
if (opt & PT_PIPELINE) | |||
{ | |||
varray->base.run = varray_run_extras; | |||
} | |||
else | |||
{ | |||
varray->base.run = varray_run; | |||
} | |||
varray->base.run = varray_run; | |||
varray->input_prim = prim; | |||
varray->output_prim = reduced_prim[prim]; | |||
varray->output_prim = decompose_prim[prim]; | |||
varray->middle = middle; | |||
middle->prepare(middle, varray->output_prim, opt); |
@@ -10,32 +10,44 @@ static void FUNC(struct draw_pt_front_end *frontend, | |||
boolean flatfirst = (draw->rasterizer->flatshade && | |||
draw->rasterizer->flatshade_first); | |||
unsigned i, flags; | |||
unsigned i, j, flags; | |||
unsigned first, incr; | |||
varray->fetch_start = start; | |||
draw_pt_split_prim(varray->input_prim, &first, &incr); | |||
#if 0 | |||
debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count); | |||
#endif | |||
#if 0 | |||
debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim, | |||
debug_printf("%s (%d) %d/%d\n", __FUNCTION__, | |||
varray->input_prim, | |||
start, count); | |||
#endif | |||
for (i = 0; i < count; ++i) { | |||
varray->fetch_elts[i] = start + i; | |||
} | |||
varray->fetch_count = count; | |||
switch (varray->input_prim) { | |||
case PIPE_PRIM_POINTS: | |||
for (i = 0; i < count; i ++) { | |||
POINT(varray, i + 0); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i < end; i++) { | |||
POINT(varray, i + 0); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
case PIPE_PRIM_LINES: | |||
for (i = 0; i+1 < count; i += 2) { | |||
LINE(varray, DRAW_PIPE_RESET_STIPPLE, | |||
i + 0, i + 1); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+1 < end; i += 2) { | |||
LINE(varray, DRAW_PIPE_RESET_STIPPLE, | |||
i + 0, i + 1); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
@@ -43,38 +55,81 @@ static void FUNC(struct draw_pt_front_end *frontend, | |||
if (count >= 2) { | |||
flags = DRAW_PIPE_RESET_STIPPLE; | |||
for (i = 1; i < count; i++, flags = 0) { | |||
LINE(varray, flags, i - 1, i); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 1; i < end; i++, flags = 0) { | |||
LINE(varray, flags, i - 1, i); | |||
} | |||
LINE(varray, flags, i - 1, 0); | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
LINE(varray, flags, i - 1, 0); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_STRIP: | |||
flags = DRAW_PIPE_RESET_STIPPLE; | |||
for (i = 1; i < count; i++, flags = 0) { | |||
LINE(varray, flags, i - 1, i); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 1; i < end; i++, flags = 0) { | |||
LINE(varray, flags, i - 1, i); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLES: | |||
for (i = 0; i+2 < count; i += 3) { | |||
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
i + 0, i + 1, i + 2); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+2 < end; i += 3) { | |||
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
i + 0, i + 1, i + 2); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
if (flatfirst) { | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
i + 0, i + 1 + (i&1), i + 2 - (i&1)); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+2 < end; i++) { | |||
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
i + 0, i + 1 + (i&1), i + 2 - (i&1)); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
if (j + first + i <= count) { | |||
varray->fetch_start -= 2; | |||
i -= 2; | |||
} | |||
} | |||
} | |||
else { | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
i + 0 + (i&1), i + 1 - (i&1), i + 2); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i + 2 < end; i++) { | |||
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
i + 0 + (i&1), i + 1 - (i&1), i + 2); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
if (j + first + i <= count) { | |||
varray->fetch_start -= 2; | |||
i -= 2; | |||
} | |||
} | |||
} | |||
break; | |||
@@ -83,51 +138,89 @@ static void FUNC(struct draw_pt_front_end *frontend, | |||
if (count >= 3) { | |||
if (flatfirst) { | |||
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+2 < end; i++) { | |||
TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
} | |||
else { | |||
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; | |||
for (i = 0; i+2 < count; i++) { | |||
TRIANGLE(varray, flags, 0, i + 1, i + 2); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+2 < end; i++) { | |||
TRIANGLE(varray, flags, 0, i + 1, i + 2); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
} | |||
} | |||
break; | |||
case PIPE_PRIM_QUADS: | |||
for (i = 0; i+3 < count; i += 4) { | |||
QUAD(varray, i + 0, i + 1, i + 2, i + 3); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+3 < end; i += 4) { | |||
QUAD(varray, i + 0, i + 1, i + 2, i + 3); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
case PIPE_PRIM_QUAD_STRIP: | |||
for (i = 0; i+3 < count; i += 2) { | |||
QUAD(varray, i + 2, i + 0, i + 1, i + 3); | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+3 < end; i += 2) { | |||
QUAD(varray, i + 2, i + 0, i + 1, i + 3); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
if (j + first + i <= count) { | |||
varray->fetch_start -= 2; | |||
i -= 2; | |||
} | |||
} | |||
break; | |||
case PIPE_PRIM_POLYGON: | |||
{ | |||
/* These bitflags look a little odd because we submit the | |||
* vertices as (1,2,0) to satisfy flatshade requirements. | |||
*/ | |||
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; | |||
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; | |||
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; | |||
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; | |||
for (i = 0; i+2 < count; i++, flags = edge_middle) { | |||
/* These bitflags look a little odd because we submit the | |||
* vertices as (1,2,0) to satisfy flatshade requirements. | |||
*/ | |||
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; | |||
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; | |||
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; | |||
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 0; i+2 < end; i++, flags = edge_middle) { | |||
if (i + 3 == count) | |||
flags |= edge_last; | |||
TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
} | |||
TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
} | |||
break; | |||
default: | |||
assert(0); |
@@ -0,0 +1,94 @@ | |||
static unsigned trim( unsigned count, unsigned first, unsigned incr ) | |||
{ | |||
return count - (count - first) % incr; | |||
} | |||
static void FUNC(struct draw_pt_front_end *frontend, | |||
pt_elt_func get_elt, | |||
const void *elts, | |||
unsigned count) | |||
{ | |||
struct varray_frontend *varray = (struct varray_frontend *)frontend; | |||
unsigned start = (unsigned)elts; | |||
unsigned i, j; | |||
unsigned first, incr; | |||
varray->fetch_start = start; | |||
draw_pt_split_prim(varray->input_prim, &first, &incr); | |||
/* Sanitize primitive length: | |||
*/ | |||
count = trim(count, first, incr); | |||
if (count < first) | |||
return; | |||
#if 0 | |||
debug_printf("%s (%d) %d/%d\n", __FUNCTION__, | |||
varray->input_prim, | |||
start, count); | |||
#endif | |||
switch (varray->input_prim) { | |||
case PIPE_PRIM_POINTS: | |||
case PIPE_PRIM_LINES: | |||
case PIPE_PRIM_TRIANGLES: | |||
case PIPE_PRIM_LINE_STRIP: | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
case PIPE_PRIM_QUADS: | |||
case PIPE_PRIM_QUAD_STRIP: | |||
for (j = 0; j < count;) { | |||
unsigned remaining = count - j; | |||
unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr ); | |||
varray_flush_linear(varray, start + j, nr); | |||
j += nr; | |||
if (nr != remaining) | |||
j -= (first - incr); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_LOOP: | |||
if (count >= 2) { | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 1; i < end; i++) { | |||
LINE(varray, i - 1, i); | |||
} | |||
LINE(varray, i - 1, 0); | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
} | |||
break; | |||
case PIPE_PRIM_POLYGON: | |||
case PIPE_PRIM_TRIANGLE_FAN: | |||
for (j = 0; j + first <= count; j += i) { | |||
unsigned end = MIN2(FETCH_MAX, count - j); | |||
end -= (end % incr); | |||
for (i = 2; i < end; i++) { | |||
TRIANGLE(varray, 0, i - 1, i); | |||
} | |||
i = end; | |||
fetch_init(varray, end); | |||
varray_flush(varray); | |||
} | |||
break; | |||
default: | |||
assert(0); | |||
break; | |||
} | |||
varray_flush(varray); | |||
} | |||
#undef TRIANGLE | |||
#undef QUAD | |||
#undef POINT | |||
#undef LINE | |||
#undef FUNC |
@@ -171,15 +171,15 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, | |||
unsigned i2, | |||
unsigned i3 ) | |||
{ | |||
const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; | |||
const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; | |||
vcache_triangle_flags( vcache, | |||
DRAW_PIPE_RESET_STIPPLE | omitEdge1, | |||
vcache_triangle_flags( vcache, | |||
( DRAW_PIPE_RESET_STIPPLE | | |||
DRAW_PIPE_EDGE_FLAG_0 | | |||
DRAW_PIPE_EDGE_FLAG_2 ), | |||
i0, i1, i3 ); | |||
vcache_triangle_flags( vcache, | |||
omitEdge2, | |||
vcache_triangle_flags( vcache, | |||
( DRAW_PIPE_EDGE_FLAG_0 | | |||
DRAW_PIPE_EDGE_FLAG_1 ), | |||
i1, i2, i3 ); | |||
} | |||
@@ -204,19 +204,6 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, | |||
static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { | |||
PIPE_PRIM_POINTS, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_LINES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES, | |||
PIPE_PRIM_TRIANGLES | |||
}; | |||
static void vcache_prepare( struct draw_pt_front_end *frontend, | |||
@@ -236,7 +223,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, | |||
} | |||
vcache->input_prim = prim; | |||
vcache->output_prim = reduced_prim[prim]; | |||
vcache->output_prim = draw_pt_reduced_prim(prim); | |||
vcache->middle = middle; | |||
middle->prepare( middle, vcache->output_prim, opt ); |
@@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo); | |||
void draw_dump_emitted_vertex(const struct vertex_info *vinfo, | |||
const uint8_t *data); | |||
static INLINE unsigned draw_translate_vinfo_format(unsigned format ) | |||
{ | |||
switch (format) { | |||
case EMIT_1F: | |||
case EMIT_1F_PSIZE: | |||
return PIPE_FORMAT_R32_FLOAT; | |||
case EMIT_2F: | |||
return PIPE_FORMAT_R32G32_FLOAT; | |||
case EMIT_3F: | |||
return PIPE_FORMAT_R32G32B32_FLOAT; | |||
case EMIT_4F: | |||
return PIPE_FORMAT_R32G32B32A32_FLOAT; | |||
case EMIT_4UB: | |||
return PIPE_FORMAT_R8G8B8A8_UNORM; | |||
default: | |||
return PIPE_FORMAT_NONE; | |||
} | |||
} | |||
#endif /* DRAW_VERTEX_H */ |
@@ -36,6 +36,8 @@ | |||
#include "draw_private.h" | |||
#include "draw_context.h" | |||
#include "draw_vs.h" | |||
#include "translate/translate.h" | |||
#include "translate/translate_cache.h" | |||
@@ -66,13 +68,13 @@ draw_bind_vertex_shader(struct draw_context *draw, | |||
if (dvs) | |||
{ | |||
draw->vertex_shader = dvs; | |||
draw->num_vs_outputs = dvs->info.num_outputs; | |||
draw->vs.vertex_shader = dvs; | |||
draw->vs.num_vs_outputs = dvs->info.num_outputs; | |||
dvs->prepare( dvs, draw ); | |||
} | |||
else { | |||
draw->vertex_shader = NULL; | |||
draw->num_vs_outputs = 0; | |||
draw->vs.vertex_shader = NULL; | |||
draw->vs.num_vs_outputs = 0; | |||
} | |||
} | |||
@@ -83,3 +85,109 @@ draw_delete_vertex_shader(struct draw_context *draw, | |||
{ | |||
dvs->delete( dvs ); | |||
} | |||
boolean | |||
draw_vs_init( struct draw_context *draw ) | |||
{ | |||
tgsi_exec_machine_init(&draw->vs.machine); | |||
/* FIXME: give this machine thing a proper constructor: | |||
*/ | |||
draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
if (!draw->vs.machine.Inputs) | |||
return FALSE; | |||
draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
if (!draw->vs.machine.Outputs) | |||
return FALSE; | |||
draw->vs.emit_cache = translate_cache_create(); | |||
if (!draw->vs.emit_cache) | |||
return FALSE; | |||
draw->vs.fetch_cache = translate_cache_create(); | |||
if (!draw->vs.fetch_cache) | |||
return FALSE; | |||
return TRUE; | |||
} | |||
void | |||
draw_vs_destroy( struct draw_context *draw ) | |||
{ | |||
if (draw->vs.machine.Inputs) | |||
align_free(draw->vs.machine.Inputs); | |||
if (draw->vs.machine.Outputs) | |||
align_free(draw->vs.machine.Outputs); | |||
if (draw->vs.fetch_cache) | |||
translate_cache_destroy(draw->vs.fetch_cache); | |||
if (draw->vs.emit_cache) | |||
translate_cache_destroy(draw->vs.emit_cache); | |||
tgsi_exec_machine_free_data(&draw->vs.machine); | |||
} | |||
struct draw_vs_varient * | |||
draw_vs_lookup_varient( struct draw_vertex_shader *vs, | |||
const struct draw_vs_varient_key *key ) | |||
{ | |||
struct draw_vs_varient *varient; | |||
unsigned i; | |||
/* Lookup existing varient: | |||
*/ | |||
for (i = 0; i < vs->nr_varients; i++) | |||
if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) | |||
return vs->varient[i]; | |||
/* Else have to create a new one: | |||
*/ | |||
varient = vs->create_varient( vs, key ); | |||
if (varient == NULL) | |||
return NULL; | |||
/* Add it to our list: | |||
*/ | |||
assert(vs->nr_varients < Elements(vs->varient)); | |||
vs->varient[vs->nr_varients++] = varient; | |||
/* Done | |||
*/ | |||
return varient; | |||
} | |||
struct translate * | |||
draw_vs_get_fetch( struct draw_context *draw, | |||
struct translate_key *key ) | |||
{ | |||
if (!draw->vs.fetch || | |||
translate_key_compare(&draw->vs.fetch->key, key) != 0) | |||
{ | |||
translate_key_sanitize(key); | |||
draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key); | |||
} | |||
return draw->vs.fetch; | |||
} | |||
struct translate * | |||
draw_vs_get_emit( struct draw_context *draw, | |||
struct translate_key *key ) | |||
{ | |||
if (!draw->vs.emit || | |||
translate_key_compare(&draw->vs.emit->key, key) != 0) | |||
{ | |||
translate_key_sanitize(key); | |||
draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key); | |||
} | |||
return draw->vs.emit; | |||
} |
@@ -38,10 +38,84 @@ | |||
struct draw_context; | |||
struct pipe_shader_state; | |||
struct draw_varient_input | |||
{ | |||
enum pipe_format format; | |||
unsigned buffer; | |||
unsigned offset; | |||
}; | |||
struct draw_varient_output | |||
{ | |||
enum pipe_format format; /* output format */ | |||
unsigned vs_output:8; /* which vertex shader output is this? */ | |||
unsigned offset:24; /* offset into output vertex */ | |||
}; | |||
struct draw_varient_element { | |||
struct draw_varient_input in; | |||
struct draw_varient_output out; | |||
}; | |||
struct draw_vs_varient_key { | |||
unsigned output_stride; | |||
unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */ | |||
unsigned nr_inputs:8; | |||
unsigned nr_outputs:8; | |||
unsigned viewport:1; | |||
unsigned clip:1; | |||
unsigned pad:5; | |||
struct draw_varient_element element[PIPE_MAX_ATTRIBS]; | |||
}; | |||
struct draw_vs_varient; | |||
typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer); | |||
typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer); | |||
struct draw_vs_varient { | |||
struct draw_vs_varient_key key; | |||
struct draw_vertex_shader *vs; | |||
void (*set_input)( struct draw_vs_varient *, | |||
unsigned i, | |||
const void *ptr, | |||
unsigned stride ); | |||
void (*set_constants)( struct draw_vs_varient *, | |||
const float (*constants)[4] ); | |||
void (*set_viewport)( struct draw_vs_varient *, | |||
const struct pipe_viewport_state * ); | |||
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer ); | |||
void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer ); | |||
void (*destroy)( struct draw_vs_varient * ); | |||
}; | |||
/** | |||
* Private version of the compiled vertex_shader | |||
*/ | |||
struct draw_vertex_shader { | |||
struct draw_context *draw; | |||
/* This member will disappear shortly: | |||
*/ | |||
@@ -49,6 +123,14 @@ struct draw_vertex_shader { | |||
struct tgsi_shader_info info; | |||
/* | |||
*/ | |||
struct draw_vs_varient *varient[16]; | |||
unsigned nr_varients; | |||
struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, | |||
const struct draw_vs_varient_key *key ); | |||
void (*prepare)( struct draw_vertex_shader *shader, | |||
struct draw_context *draw ); | |||
@@ -68,6 +150,15 @@ struct draw_vertex_shader { | |||
}; | |||
struct draw_vs_varient * | |||
draw_vs_lookup_varient( struct draw_vertex_shader *base, | |||
const struct draw_vs_varient_key *key ); | |||
/******************************************************************************** | |||
* Internal functions: | |||
*/ | |||
struct draw_vertex_shader * | |||
draw_create_vs_exec(struct draw_context *draw, | |||
const struct pipe_shader_state *templ); | |||
@@ -81,7 +172,52 @@ draw_create_vs_llvm(struct draw_context *draw, | |||
const struct pipe_shader_state *templ); | |||
struct draw_vs_varient_key; | |||
struct draw_vertex_shader; | |||
struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, | |||
const struct draw_vs_varient_key *key ); | |||
/******************************************************************************** | |||
* Helpers for vs implementations that don't do their own fetch/emit varients. | |||
* Means these can be shared between shaders. | |||
*/ | |||
struct translate; | |||
struct translate_key; | |||
struct translate *draw_vs_get_fetch( struct draw_context *draw, | |||
struct translate_key *key ); | |||
struct translate *draw_vs_get_emit( struct draw_context *draw, | |||
struct translate_key *key ); | |||
struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, | |||
const struct draw_vs_varient_key *key ); | |||
static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) | |||
{ | |||
return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element); | |||
} | |||
static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, | |||
const struct draw_vs_varient_key *b ) | |||
{ | |||
int keysize = draw_vs_varient_keysize(a); | |||
return memcmp(a, b, keysize); | |||
} | |||
#define MAX_TGSI_VERTICES 4 | |||
#endif |
@@ -0,0 +1,222 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/* Authors: Keith Whitwell <keith@tungstengraphics.com> | |||
*/ | |||
#ifndef DRAW_VS_AOS_H | |||
#define DRAW_VS_AOS_H | |||
struct tgsi_token; | |||
struct x86_function; | |||
#include "pipe/p_state.h" | |||
#include "rtasm/rtasm_x86sse.h" | |||
#define X 0 | |||
#define Y 1 | |||
#define Z 2 | |||
#define W 3 | |||
#define MAX_INPUTS PIPE_MAX_ATTRIBS | |||
#define MAX_OUTPUTS PIPE_MAX_ATTRIBS | |||
#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ | |||
#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ | |||
#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ | |||
#define MAX_INTERNALS 8 | |||
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT | |||
#define FPU_RND_NEG 1 | |||
#define FPU_RND_NEAREST 2 | |||
struct aos_machine; | |||
typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, | |||
float *result, | |||
const float *in, | |||
unsigned count ); | |||
struct shine_tab { | |||
float exponent; | |||
float values[258]; | |||
unsigned last_used; | |||
}; | |||
struct lit_info { | |||
lit_func func; | |||
struct shine_tab *shine_tab; | |||
}; | |||
#define MAX_SHINE_TAB 4 | |||
#define MAX_LIT_INFO 16 | |||
/* This is the temporary storage used by all the aos_sse vs varients. | |||
* Create one per context and reuse by passing a pointer in at | |||
* vs_varient creation?? | |||
*/ | |||
struct aos_machine { | |||
float input [MAX_INPUTS ][4]; | |||
float output [MAX_OUTPUTS ][4]; | |||
float temp [MAX_TEMPS ][4]; | |||
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */ | |||
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ | |||
float internal [MAX_INTERNALS ][4]; | |||
float scale[4]; /* viewport */ | |||
float translate[4]; /* viewport */ | |||
float tmp[2][4]; /* scratch space for LIT */ | |||
struct shine_tab shine_tab[MAX_SHINE_TAB]; | |||
struct lit_info lit_info[MAX_LIT_INFO]; | |||
unsigned now; | |||
ushort fpu_rnd_nearest; | |||
ushort fpu_rnd_neg_inf; | |||
ushort fpu_restore; | |||
ushort fpucntl; /* one of FPU_* above */ | |||
struct { | |||
const void *input_ptr; | |||
unsigned input_stride; | |||
unsigned output_offset; | |||
} attrib[PIPE_MAX_ATTRIBS]; | |||
}; | |||
struct aos_compilation { | |||
struct x86_function *func; | |||
struct draw_vs_varient_aos_sse *vaos; | |||
unsigned insn_counter; | |||
unsigned num_immediates; | |||
unsigned count; | |||
unsigned lit_count; | |||
struct { | |||
unsigned idx:16; | |||
unsigned file:8; | |||
unsigned dirty:8; | |||
unsigned last_used; | |||
} xmm[8]; | |||
boolean input_fetched[PIPE_MAX_ATTRIBS]; | |||
unsigned output_last_write[PIPE_MAX_ATTRIBS]; | |||
boolean have_sse2; | |||
boolean error; | |||
short fpucntl; | |||
/* these are actually known values, but putting them in a struct | |||
* like this is helpful to keep them in sync across the file. | |||
*/ | |||
struct x86_reg tmp_EAX; | |||
struct x86_reg idx_EBX; /* either start+i or &elt[i] */ | |||
struct x86_reg outbuf_ECX; | |||
struct x86_reg machine_EDX; | |||
struct x86_reg count_ESI; /* decrements to zero */ | |||
}; | |||
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); | |||
void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx ); | |||
void aos_adopt_xmm_reg( struct aos_compilation *cp, | |||
struct x86_reg reg, | |||
unsigned file, | |||
unsigned idx, | |||
unsigned dirty ); | |||
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, | |||
unsigned file, | |||
unsigned idx ); | |||
boolean aos_fetch_inputs( struct aos_compilation *cp, | |||
boolean linear ); | |||
boolean aos_emit_outputs( struct aos_compilation *cp ); | |||
#define IMM_ONES 0 /* 1, 1,1,1 */ | |||
#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */ | |||
#define IMM_IDENTITY 2 /* 0, 0,0,1 */ | |||
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ | |||
#define IMM_255 4 /* 255, 255, 255, 255 */ | |||
#define IMM_NEGS 5 /* -1,-1,-1,-1 */ | |||
#define IMM_RSQ 6 /* -.5,1.5,_,_ */ | |||
#define IMM_PSIZE 7 /* not really an immediate - updated each run */ | |||
struct x86_reg aos_get_internal( struct aos_compilation *cp, | |||
unsigned imm ); | |||
struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, | |||
unsigned imm ); | |||
#define ERROR(cp, msg) \ | |||
do { \ | |||
debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ | |||
cp->error = 1; \ | |||
assert(0); \ | |||
} while (0) | |||
struct draw_vs_varient_aos_sse { | |||
struct draw_vs_varient base; | |||
struct draw_context *draw; | |||
#if 0 | |||
struct { | |||
const void *ptr; | |||
unsigned stride; | |||
} attrib[PIPE_MAX_ATTRIBS]; | |||
#endif | |||
struct aos_machine *machine; /* XXX: temporarily unshared */ | |||
vsv_run_linear_func gen_run_linear; | |||
vsv_run_elts_func gen_run_elts; | |||
struct x86_function func[2]; | |||
}; | |||
#endif | |||
@@ -0,0 +1,326 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
#include "pipe/p_util.h" | |||
#include "pipe/p_shader_tokens.h" | |||
#include "tgsi/util/tgsi_parse.h" | |||
#include "tgsi/util/tgsi_util.h" | |||
#include "tgsi/exec/tgsi_exec.h" | |||
#include "draw_vs.h" | |||
#include "draw_vs_aos.h" | |||
#include "draw_vertex.h" | |||
#include "rtasm/rtasm_x86sse.h" | |||
#ifdef PIPE_ARCH_X86 | |||
/* Note - don't yet have to worry about interacting with the code in | |||
* draw_vs_aos.c as there is no intermingling of generated code... | |||
* That may have to change, we'll see. | |||
*/ | |||
static void emit_load_R32G32B32A32( struct aos_compilation *cp, | |||
struct x86_reg data, | |||
struct x86_reg src_ptr ) | |||
{ | |||
sse_movups(cp->func, data, src_ptr); | |||
} | |||
static void emit_load_R32G32B32( struct aos_compilation *cp, | |||
struct x86_reg data, | |||
struct x86_reg src_ptr ) | |||
{ | |||
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); | |||
sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); | |||
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); | |||
sse_movlps(cp->func, data, src_ptr); | |||
} | |||
static void emit_load_R32G32( struct aos_compilation *cp, | |||
struct x86_reg data, | |||
struct x86_reg src_ptr ) | |||
{ | |||
sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); | |||
sse_movlps(cp->func, data, src_ptr); | |||
} | |||
static void emit_load_R32( struct aos_compilation *cp, | |||
struct x86_reg data, | |||
struct x86_reg src_ptr ) | |||
{ | |||
sse_movss(cp->func, data, src_ptr); | |||
sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); | |||
} | |||
static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, | |||
struct x86_reg data, | |||
struct x86_reg src_ptr ) | |||
{ | |||
sse_movss(cp->func, data, src_ptr); | |||
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); | |||
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); | |||
sse2_cvtdq2ps(cp->func, data, data); | |||
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); | |||
} | |||
static void get_src_ptr( struct x86_function *func, | |||
struct x86_reg src, | |||
struct x86_reg machine, | |||
struct x86_reg elt, | |||
unsigned a ) | |||
{ | |||
struct x86_reg input_ptr = | |||
x86_make_disp(machine, | |||
Offset(struct aos_machine, attrib[a].input_ptr)); | |||
struct x86_reg input_stride = | |||
x86_make_disp(machine, | |||
Offset(struct aos_machine, attrib[a].input_stride)); | |||
/* Calculate pointer to current attrib: | |||
*/ | |||
x86_mov(func, src, input_stride); | |||
x86_imul(func, src, elt); | |||
x86_add(func, src, input_ptr); | |||
} | |||
/* Extended swizzles? Maybe later. | |||
*/ | |||
static void emit_swizzle( struct aos_compilation *cp, | |||
struct x86_reg dest, | |||
struct x86_reg src, | |||
unsigned shuffle ) | |||
{ | |||
sse_shufps(cp->func, dest, src, shuffle); | |||
} | |||
static boolean load_input( struct aos_compilation *cp, | |||
unsigned idx, | |||
boolean linear ) | |||
{ | |||
unsigned format = cp->vaos->base.key.element[idx].in.format; | |||
struct x86_reg src = cp->tmp_EAX; | |||
struct x86_reg dataXMM = aos_get_xmm_reg(cp); | |||
/* Figure out source pointer address: | |||
*/ | |||
get_src_ptr(cp->func, | |||
src, | |||
cp->machine_EDX, | |||
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), | |||
idx); | |||
src = x86_deref(src); | |||
aos_adopt_xmm_reg( cp, | |||
dataXMM, | |||
TGSI_FILE_INPUT, | |||
idx, | |||
TRUE ); | |||
switch (format) { | |||
case PIPE_FORMAT_R32_FLOAT: | |||
emit_load_R32(cp, dataXMM, src); | |||
break; | |||
case PIPE_FORMAT_R32G32_FLOAT: | |||
emit_load_R32G32(cp, dataXMM, src); | |||
break; | |||
case PIPE_FORMAT_R32G32B32_FLOAT: | |||
emit_load_R32G32B32(cp, dataXMM, src); | |||
break; | |||
case PIPE_FORMAT_R32G32B32A32_FLOAT: | |||
emit_load_R32G32B32A32(cp, dataXMM, src); | |||
break; | |||
case PIPE_FORMAT_B8G8R8A8_UNORM: | |||
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); | |||
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); | |||
break; | |||
case PIPE_FORMAT_R8G8B8A8_UNORM: | |||
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); | |||
break; | |||
default: | |||
ERROR(cp, "unhandled input format"); | |||
return FALSE; | |||
} | |||
return TRUE; | |||
} | |||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) | |||
{ | |||
unsigned i; | |||
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { | |||
if (!load_input( cp, i, linear )) | |||
return FALSE; | |||
cp->insn_counter++; | |||
debug_printf("\n"); | |||
} | |||
return TRUE; | |||
} | |||
static void emit_store_R32G32B32A32( struct aos_compilation *cp, | |||
struct x86_reg dst_ptr, | |||
struct x86_reg dataXMM ) | |||
{ | |||
sse_movups(cp->func, dst_ptr, dataXMM); | |||
} | |||
static void emit_store_R32G32B32( struct aos_compilation *cp, | |||
struct x86_reg dst_ptr, | |||
struct x86_reg dataXMM ) | |||
{ | |||
sse_movlps(cp->func, dst_ptr, dataXMM); | |||
sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ | |||
sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM); | |||
} | |||
static void emit_store_R32G32( struct aos_compilation *cp, | |||
struct x86_reg dst_ptr, | |||
struct x86_reg dataXMM ) | |||
{ | |||
sse_movlps(cp->func, dst_ptr, dataXMM); | |||
} | |||
static void emit_store_R32( struct aos_compilation *cp, | |||
struct x86_reg dst_ptr, | |||
struct x86_reg dataXMM ) | |||
{ | |||
sse_movss(cp->func, dst_ptr, dataXMM); | |||
} | |||
static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp, | |||
struct x86_reg dst_ptr, | |||
struct x86_reg dataXMM ) | |||
{ | |||
sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255)); | |||
sse2_cvtps2dq(cp->func, dataXMM, dataXMM); | |||
sse2_packssdw(cp->func, dataXMM, dataXMM); | |||
sse2_packuswb(cp->func, dataXMM, dataXMM); | |||
sse_movss(cp->func, dst_ptr, dataXMM); | |||
} | |||
static boolean emit_output( struct aos_compilation *cp, | |||
struct x86_reg ptr, | |||
struct x86_reg dataXMM, | |||
unsigned format ) | |||
{ | |||
switch (format) { | |||
case EMIT_1F: | |||
case EMIT_1F_PSIZE: | |||
emit_store_R32(cp, ptr, dataXMM); | |||
break; | |||
case EMIT_2F: | |||
emit_store_R32G32(cp, ptr, dataXMM); | |||
break; | |||
case EMIT_3F: | |||
emit_store_R32G32B32(cp, ptr, dataXMM); | |||
break; | |||
case EMIT_4F: | |||
emit_store_R32G32B32A32(cp, ptr, dataXMM); | |||
break; | |||
case EMIT_4UB: | |||
if (1) { | |||
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); | |||
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); | |||
} | |||
else { | |||
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); | |||
} | |||
break; | |||
default: | |||
ERROR(cp, "unhandled output format"); | |||
return FALSE; | |||
} | |||
return TRUE; | |||
} | |||
boolean aos_emit_outputs( struct aos_compilation *cp ) | |||
{ | |||
unsigned i; | |||
for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { | |||
unsigned format = cp->vaos->base.key.element[i].out.format; | |||
unsigned offset = cp->vaos->base.key.element[i].out.offset; | |||
unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; | |||
struct x86_reg data; | |||
if (format == EMIT_1F_PSIZE) { | |||
data = aos_get_internal_xmm( cp, IMM_PSIZE ); | |||
} | |||
else { | |||
data = aos_get_shader_reg( cp, | |||
TGSI_FILE_OUTPUT, | |||
vs_output ); | |||
} | |||
if (data.file != file_XMM) { | |||
struct x86_reg tmp = aos_get_xmm_reg( cp ); | |||
sse_movups(cp->func, tmp, data); | |||
data = tmp; | |||
} | |||
if (!emit_output( cp, | |||
x86_make_disp( cp->outbuf_ECX, offset ), | |||
data, | |||
format )) | |||
return FALSE; | |||
aos_release_xmm_reg( cp, data.idx ); | |||
cp->insn_counter++; | |||
debug_printf("\n"); | |||
} | |||
return TRUE; | |||
} | |||
#endif |
@@ -179,10 +179,12 @@ draw_create_vs_exec(struct draw_context *draw, | |||
tgsi_scan_shader(state->tokens, &vs->base.info); | |||
vs->base.draw = draw; | |||
vs->base.prepare = vs_exec_prepare; | |||
vs->base.run_linear = vs_exec_run_linear; | |||
vs->base.delete = vs_exec_delete; | |||
vs->machine = &draw->machine; | |||
vs->base.create_varient = draw_vs_varient_generic; | |||
vs->machine = &draw->vs.machine; | |||
return &vs->base; | |||
} |
@@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw, | |||
tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); | |||
vs->base.draw = draw; | |||
vs->base.prepare = vs_llvm_prepare; | |||
vs->base.create_varient = draw_vs_varient_generic; | |||
vs->base.run_linear = vs_llvm_run_linear; | |||
vs->base.delete = vs_llvm_delete; | |||
vs->machine = &draw->machine; |
@@ -49,9 +49,7 @@ | |||
#include "tgsi/util/tgsi_parse.h" | |||
#define SSE_MAX_VERTICES 4 | |||
#define SSE_SWIZZLES 1 | |||
#if SSE_SWIZZLES | |||
typedef void (XSTDCALL *codegen_function) ( | |||
const struct tgsi_exec_vector *input, /* 1 */ | |||
struct tgsi_exec_vector *output, /* 2 */ | |||
@@ -64,14 +62,6 @@ typedef void (XSTDCALL *codegen_function) ( | |||
float (*aos_output)[4], /* 9 */ | |||
uint num_outputs, /* 10 */ | |||
uint output_stride ); /* 11 */ | |||
#else | |||
typedef void (XSTDCALL *codegen_function) ( | |||
const struct tgsi_exec_vector *input, | |||
struct tgsi_exec_vector *output, | |||
float (*constant)[4], | |||
struct tgsi_exec_vector *temporary, | |||
float (*immediates)[4] ); | |||
#endif | |||
struct draw_sse_vertex_shader { | |||
struct draw_vertex_shader base; | |||
@@ -113,7 +103,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, | |||
for (i = 0; i < count; i += MAX_TGSI_VERTICES) { | |||
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); | |||
#if SSE_SWIZZLES | |||
/* run compiled shader | |||
*/ | |||
shader->func(machine->Inputs, | |||
@@ -130,43 +119,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, | |||
input = (const float (*)[4])((const char *)input + input_stride * max_vertices); | |||
output = (float (*)[4])((char *)output + output_stride * max_vertices); | |||
#else | |||
unsigned int j, slot; | |||
/* Swizzle inputs. | |||
*/ | |||
for (j = 0; j < max_vertices; j++) { | |||
for (slot = 0; slot < base->info.num_inputs; slot++) { | |||
machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; | |||
machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; | |||
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; | |||
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; | |||
} | |||
input = (const float (*)[4])((const char *)input + input_stride); | |||
} | |||
/* run compiled shader | |||
*/ | |||
shader->func(machine->Inputs, | |||
machine->Outputs, | |||
(float (*)[4])constants, | |||
machine->Temps, | |||
shader->immediates); | |||
/* Unswizzle all output results. | |||
*/ | |||
for (j = 0; j < max_vertices; j++) { | |||
for (slot = 0; slot < base->info.num_outputs; slot++) { | |||
output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; | |||
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; | |||
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; | |||
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; | |||
} | |||
output = (float (*)[4])((char *)output + output_stride); | |||
} | |||
#endif | |||
} | |||
} | |||
@@ -205,15 +157,18 @@ draw_create_vs_sse(struct draw_context *draw, | |||
tgsi_scan_shader(templ->tokens, &vs->base.info); | |||
vs->base.draw = draw; | |||
vs->base.create_varient = draw_vs_varient_aos_sse; | |||
// vs->base.create_varient = draw_vs_varient_generic; | |||
vs->base.prepare = vs_sse_prepare; | |||
vs->base.run_linear = vs_sse_run_linear; | |||
vs->base.delete = vs_sse_delete; | |||
vs->machine = &draw->machine; | |||
vs->machine = &draw->vs.machine; | |||
x86_init_func( &vs->sse2_program ); | |||
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, | |||
&vs->sse2_program, vs->immediates, SSE_SWIZZLES )) | |||
&vs->sse2_program, vs->immediates, TRUE )) | |||
goto fail; | |||
vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); |
@@ -0,0 +1,326 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
/* | |||
* Authors: | |||
* Keith Whitwell <keith@tungstengraphics.com> | |||
*/ | |||
#include "pipe/p_util.h" | |||
#include "draw/draw_context.h" | |||
#include "draw/draw_private.h" | |||
#include "draw/draw_vbuf.h" | |||
#include "draw/draw_vertex.h" | |||
#include "draw/draw_vs.h" | |||
#include "translate/translate.h" | |||
#include "translate/translate_cache.h" | |||
/* A first pass at incorporating vertex fetch/emit functionality into | |||
*/ | |||
struct draw_vs_varient_generic { | |||
struct draw_vs_varient base; | |||
struct pipe_viewport_state viewport; | |||
struct draw_vertex_shader *shader; | |||
struct draw_context *draw; | |||
/* Basic plan is to run these two translate functions before/after | |||
* the vertex shader's existing run_linear() routine to simulate | |||
* the inclusion of this functionality into the shader... | |||
* | |||
* Next will look at actually including it. | |||
*/ | |||
struct translate *fetch; | |||
struct translate *emit; | |||
const float (*constants)[4]; | |||
}; | |||
static void vsvg_set_constants( struct draw_vs_varient *varient, | |||
const float (*constants)[4] ) | |||
{ | |||
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
vsvg->constants = constants; | |||
} | |||
static void vsvg_set_input( struct draw_vs_varient *varient, | |||
unsigned buffer, | |||
const void *ptr, | |||
unsigned stride ) | |||
{ | |||
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
vsvg->fetch->set_buffer(vsvg->fetch, | |||
buffer, | |||
ptr, | |||
stride); | |||
} | |||
/* Mainly for debug at this stage: | |||
*/ | |||
static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, | |||
unsigned count, | |||
void *output_buffer ) | |||
{ | |||
char *ptr = (char *)output_buffer; | |||
const float *scale = vsvg->viewport.scale; | |||
const float *trans = vsvg->viewport.translate; | |||
unsigned stride = vsvg->base.key.output_stride; | |||
unsigned j; | |||
for (j = 0; j < count; j++, ptr += stride) { | |||
float *data = (float *)ptr; | |||
float w = 1.0f / data[3]; | |||
data[0] = data[0] * w * scale[0] + trans[0]; | |||
data[1] = data[1] * w * scale[1] + trans[1]; | |||
data[2] = data[2] * w * scale[2] + trans[2]; | |||
data[3] = w; | |||
} | |||
} | |||
static void do_viewport( struct draw_vs_varient_generic *vsvg, | |||
unsigned count, | |||
void *output_buffer ) | |||
{ | |||
char *ptr = (char *)output_buffer; | |||
const float *scale = vsvg->viewport.scale; | |||
const float *trans = vsvg->viewport.translate; | |||
unsigned stride = vsvg->base.key.output_stride; | |||
unsigned j; | |||
for (j = 0; j < count; j++, ptr += stride) { | |||
float *data = (float *)ptr; | |||
data[0] = data[0] * scale[0] + trans[0]; | |||
data[1] = data[1] * scale[1] + trans[1]; | |||
data[2] = data[2] * scale[2] + trans[2]; | |||
} | |||
} | |||
static void vsvg_run_elts( struct draw_vs_varient *varient, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer) | |||
{ | |||
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
/* Want to do this in small batches for cache locality? | |||
*/ | |||
vsvg->fetch->run_elts( vsvg->fetch, | |||
elts, | |||
count, | |||
output_buffer ); | |||
//if (!vsvg->base.vs->is_passthrough) | |||
{ | |||
vsvg->base.vs->run_linear( vsvg->base.vs, | |||
output_buffer, | |||
output_buffer, | |||
vsvg->constants, | |||
count, | |||
vsvg->base.key.output_stride, | |||
vsvg->base.key.output_stride); | |||
if (vsvg->base.key.clip) { | |||
/* not really handling clipping, just do the rhw so we can | |||
* see the results... | |||
*/ | |||
do_rhw_viewport( vsvg, | |||
count, | |||
output_buffer ); | |||
} | |||
else if (vsvg->base.key.viewport) { | |||
do_viewport( vsvg, | |||
count, | |||
output_buffer ); | |||
} | |||
//if (!vsvg->already_in_emit_format) | |||
vsvg->emit->set_buffer( vsvg->emit, | |||
0, | |||
output_buffer, | |||
vsvg->base.key.output_stride ); | |||
vsvg->emit->run( vsvg->emit, | |||
0, count, | |||
output_buffer ); | |||
} | |||
} | |||
static void vsvg_run_linear( struct draw_vs_varient *varient, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer ) | |||
{ | |||
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
//debug_printf("%s %d %d\n", __FUNCTION__, start, count); | |||
vsvg->fetch->run( vsvg->fetch, | |||
start, | |||
count, | |||
output_buffer ); | |||
//if (!vsvg->base.vs->is_passthrough) | |||
{ | |||
vsvg->base.vs->run_linear( vsvg->base.vs, | |||
output_buffer, | |||
output_buffer, | |||
vsvg->constants, | |||
count, | |||
vsvg->base.key.output_stride, | |||
vsvg->base.key.output_stride); | |||
if (vsvg->base.key.clip) { | |||
/* not really handling clipping, just do the rhw so we can | |||
* see the results... | |||
*/ | |||
do_rhw_viewport( vsvg, | |||
count, | |||
output_buffer ); | |||
} | |||
else if (vsvg->base.key.viewport) { | |||
do_viewport( vsvg, | |||
count, | |||
output_buffer ); | |||
} | |||
//if (!vsvg->already_in_emit_format) | |||
vsvg->emit->set_buffer( vsvg->emit, | |||
0, | |||
output_buffer, | |||
vsvg->base.key.output_stride ); | |||
vsvg->emit->set_buffer( vsvg->emit, | |||
1, | |||
&vsvg->draw->rasterizer->point_size, | |||
0); | |||
vsvg->emit->run( vsvg->emit, | |||
0, count, | |||
output_buffer ); | |||
} | |||
} | |||
static void vsvg_set_viewport( struct draw_vs_varient *varient, | |||
const struct pipe_viewport_state *viewport ) | |||
{ | |||
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
vsvg->viewport = *viewport; | |||
} | |||
static void vsvg_destroy( struct draw_vs_varient *varient ) | |||
{ | |||
FREE(varient); | |||
} | |||
struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, | |||
const struct draw_vs_varient_key *key ) | |||
{ | |||
unsigned i; | |||
struct translate_key fetch, emit; | |||
struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); | |||
if (vsvg == NULL) | |||
return NULL; | |||
vsvg->base.key = *key; | |||
vsvg->base.vs = vs; | |||
vsvg->base.set_input = vsvg_set_input; | |||
vsvg->base.set_constants = vsvg_set_constants; | |||
vsvg->base.set_viewport = vsvg_set_viewport; | |||
vsvg->base.run_elts = vsvg_run_elts; | |||
vsvg->base.run_linear = vsvg_run_linear; | |||
vsvg->base.destroy = vsvg_destroy; | |||
/* Build free-standing fetch and emit functions: | |||
*/ | |||
fetch.nr_elements = key->nr_inputs; | |||
fetch.output_stride = 0; | |||
for (i = 0; i < key->nr_inputs; i++) { | |||
fetch.element[i].input_format = key->element[i].in.format; | |||
fetch.element[i].input_buffer = key->element[i].in.buffer; | |||
fetch.element[i].input_offset = key->element[i].in.offset; | |||
fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; | |||
fetch.element[i].output_offset = fetch.output_stride; | |||
fetch.output_stride += 4 * sizeof(float); | |||
} | |||
emit.nr_elements = key->nr_outputs; | |||
emit.output_stride = key->output_stride; | |||
for (i = 0; i < key->nr_outputs; i++) { | |||
if (key->element[i].out.format != EMIT_1F_PSIZE) | |||
{ | |||
emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; | |||
emit.element[i].input_buffer = 0; | |||
emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); | |||
emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); | |||
emit.element[i].output_offset = key->element[i].out.offset; | |||
} | |||
else { | |||
emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; | |||
emit.element[i].input_buffer = 1; | |||
emit.element[i].input_offset = 0; | |||
emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; | |||
emit.element[i].output_offset = key->element[i].out.offset; | |||
} | |||
} | |||
vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); | |||
vsvg->emit = draw_vs_get_emit( vs->draw, &emit ); | |||
return &vsvg->base; | |||
} | |||
@@ -38,11 +38,8 @@ | |||
#define DUMP_SSE 0 | |||
#if DUMP_SSE | |||
static void | |||
_print_reg( | |||
struct x86_reg reg ) | |||
void x86_print_reg( struct x86_reg reg ) | |||
{ | |||
if (reg.mod != mod_REG) | |||
debug_printf( "[" ); | |||
@@ -79,6 +76,7 @@ _print_reg( | |||
debug_printf( "]" ); | |||
} | |||
#if DUMP_SSE | |||
#define DUMP_START() debug_printf( "\n" ) | |||
#define DUMP_END() debug_printf( "\n" ) | |||
@@ -89,7 +87,7 @@ _print_reg( | |||
foo++; \ | |||
if (*foo) \ | |||
foo++; \ | |||
debug_printf( "\n% 15s ", foo ); \ | |||
debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ | |||
} while (0) | |||
#define DUMP_I( I ) do { \ | |||
@@ -99,27 +97,27 @@ _print_reg( | |||
#define DUMP_R( R0 ) do { \ | |||
DUMP(); \ | |||
_print_reg( R0 ); \ | |||
x86_print_reg( R0 ); \ | |||
} while( 0 ) | |||
#define DUMP_RR( R0, R1 ) do { \ | |||
DUMP(); \ | |||
_print_reg( R0 ); \ | |||
x86_print_reg( R0 ); \ | |||
debug_printf( ", " ); \ | |||
_print_reg( R1 ); \ | |||
x86_print_reg( R1 ); \ | |||
} while( 0 ) | |||
#define DUMP_RI( R0, I ) do { \ | |||
DUMP(); \ | |||
_print_reg( R0 ); \ | |||
x86_print_reg( R0 ); \ | |||
debug_printf( ", %u", I ); \ | |||
} while( 0 ) | |||
#define DUMP_RRI( R0, R1, I ) do { \ | |||
DUMP(); \ | |||
_print_reg( R0 ); \ | |||
x86_print_reg( R0 ); \ | |||
debug_printf( ", " ); \ | |||
_print_reg( R1 ); \ | |||
x86_print_reg( R1 ); \ | |||
debug_printf( ", %u", I ); \ | |||
} while( 0 ) | |||
@@ -222,6 +220,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1 | |||
/* Build a modRM byte + possible displacement. No treatment of SIB | |||
* indexing. BZZT - no way to encode an absolute address. | |||
* | |||
* This is the "/r" field in the x86 manuals... | |||
*/ | |||
static void emit_modrm( struct x86_function *p, | |||
struct x86_reg reg, | |||
@@ -260,7 +260,8 @@ static void emit_modrm( struct x86_function *p, | |||
} | |||
} | |||
/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. | |||
*/ | |||
static void emit_modrm_noreg( struct x86_function *p, | |||
unsigned op, | |||
struct x86_reg regmem ) | |||
@@ -369,8 +370,7 @@ void x86_jcc( struct x86_function *p, | |||
DUMP_I(cc); | |||
if (offset < 0) { | |||
int amt = p->csr - p->store; | |||
assert(amt > -offset); | |||
assert(p->csr - p->store > -offset); | |||
} | |||
if (offset <= 127 && offset >= -128) { | |||
@@ -447,6 +447,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) | |||
emit_1i(p, imm); | |||
} | |||
void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) | |||
{ | |||
DUMP_RI( dst, imm ); | |||
assert(dst.mod == mod_REG); | |||
emit_1ub(p, 0x80); | |||
emit_modrm_noreg(p, 0, dst); | |||
emit_1ub(p, imm); | |||
} | |||
void x86_push( struct x86_function *p, | |||
struct x86_reg reg ) | |||
{ | |||
@@ -463,6 +473,17 @@ void x86_push( struct x86_function *p, | |||
p->stack_offset += 4; | |||
} | |||
void x86_push_imm32( struct x86_function *p, | |||
int imm32 ) | |||
{ | |||
DUMP_I( imm32 ); | |||
emit_1ub(p, 0x68); | |||
emit_1i(p, imm32); | |||
p->stack_offset += 4; | |||
} | |||
void x86_pop( struct x86_function *p, | |||
struct x86_reg reg ) | |||
{ | |||
@@ -990,6 +1011,24 @@ void sse2_movd( struct x86_function *p, | |||
/*********************************************************************** | |||
* x87 instructions | |||
*/ | |||
static void note_x87_pop( struct x86_function *p ) | |||
{ | |||
p->x87_stack--; | |||
assert(p->x87_stack >= 0); | |||
} | |||
static void note_x87_push( struct x86_function *p ) | |||
{ | |||
p->x87_stack++; | |||
assert(p->x87_stack <= 7); | |||
} | |||
void x87_assert_stack_empty( struct x86_function *p ) | |||
{ | |||
assert (p->x87_stack == 0); | |||
} | |||
void x87_fist( struct x86_function *p, struct x86_reg dst ) | |||
{ | |||
DUMP_R( dst ); | |||
@@ -1002,6 +1041,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst ) | |||
DUMP_R( dst ); | |||
emit_1ub(p, 0xdb); | |||
emit_modrm_noreg(p, 3, dst); | |||
note_x87_pop(p); | |||
} | |||
void x87_fild( struct x86_function *p, struct x86_reg arg ) | |||
@@ -1009,12 +1049,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg ) | |||
DUMP_R( arg ); | |||
emit_1ub(p, 0xdf); | |||
emit_modrm_noreg(p, 0, arg); | |||
note_x87_push(p); | |||
} | |||
void x87_fldz( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xee); | |||
note_x87_push(p); | |||
} | |||
@@ -1031,18 +1073,21 @@ void x87_fld1( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xe8); | |||
note_x87_push(p); | |||
} | |||
void x87_fldl2e( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xea); | |||
note_x87_push(p); | |||
} | |||
void x87_fldln2( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xed); | |||
note_x87_push(p); | |||
} | |||
void x87_fwait( struct x86_function *p ) | |||
@@ -1063,6 +1108,49 @@ void x87_fclex( struct x86_function *p ) | |||
x87_fnclex(p); | |||
} | |||
void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xda, 0xc0+arg.idx); | |||
} | |||
void x87_fcmove( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xda, 0xc8+arg.idx); | |||
} | |||
void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xda, 0xd0+arg.idx); | |||
} | |||
void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xdb, 0xc0+arg.idx); | |||
} | |||
void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xdb, 0xc8+arg.idx); | |||
} | |||
void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xdb, 0xd0+arg.idx); | |||
} | |||
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, | |||
unsigned char dst0ub0, | |||
@@ -1150,6 +1238,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) | |||
assert(dst.file == file_x87); | |||
assert(dst.idx >= 1); | |||
emit_2ub(p, 0xde, 0xc8+dst.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_fsubp( struct x86_function *p, struct x86_reg dst ) | |||
@@ -1158,6 +1247,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) | |||
assert(dst.file == file_x87); | |||
assert(dst.idx >= 1); | |||
emit_2ub(p, 0xde, 0xe8+dst.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) | |||
@@ -1166,6 +1256,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) | |||
assert(dst.file == file_x87); | |||
assert(dst.idx >= 1); | |||
emit_2ub(p, 0xde, 0xe0+dst.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_faddp( struct x86_function *p, struct x86_reg dst ) | |||
@@ -1174,6 +1265,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) | |||
assert(dst.file == file_x87); | |||
assert(dst.idx >= 1); | |||
emit_2ub(p, 0xde, 0xc0+dst.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_fdivp( struct x86_function *p, struct x86_reg dst ) | |||
@@ -1182,6 +1274,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) | |||
assert(dst.file == file_x87); | |||
assert(dst.idx >= 1); | |||
emit_2ub(p, 0xde, 0xf8+dst.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) | |||
@@ -1190,6 +1283,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) | |||
assert(dst.file == file_x87); | |||
assert(dst.idx >= 1); | |||
emit_2ub(p, 0xde, 0xf0+dst.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_ftst( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xe4); | |||
} | |||
void x87_fucom( struct x86_function *p, struct x86_reg arg ) | |||
@@ -1204,12 +1304,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg ) | |||
DUMP_R( arg ); | |||
assert(arg.file == file_x87); | |||
emit_2ub(p, 0xdd, 0xe8+arg.idx); | |||
note_x87_pop(p); | |||
} | |||
void x87_fucompp( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xda, 0xe9); | |||
note_x87_pop(p); /* pop twice */ | |||
note_x87_pop(p); /* pop twice */ | |||
} | |||
void x87_fxch( struct x86_function *p, struct x86_reg arg ) | |||
@@ -1291,6 +1394,7 @@ void x87_fyl2x( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xf1); | |||
note_x87_pop(p); | |||
} | |||
/* st1 = st1 * log2(st0 + 1.0); | |||
@@ -1302,6 +1406,7 @@ void x87_fyl2xp1( struct x86_function *p ) | |||
{ | |||
DUMP(); | |||
emit_2ub(p, 0xd9, 0xf9); | |||
note_x87_pop(p); | |||
} | |||
@@ -1314,6 +1419,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) | |||
emit_1ub(p, 0xd9); | |||
emit_modrm_noreg(p, 0, arg); | |||
} | |||
note_x87_push(p); | |||
} | |||
void x87_fst( struct x86_function *p, struct x86_reg dst ) | |||
@@ -1336,8 +1442,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) | |||
emit_1ub(p, 0xd9); | |||
emit_modrm_noreg(p, 3, dst); | |||
} | |||
note_x87_pop(p); | |||
} | |||
void x87_fpop( struct x86_function *p ) | |||
{ | |||
x87_fstp( p, x86_make_reg( file_x87, 0 )); | |||
} | |||
void x87_fcom( struct x86_function *p, struct x86_reg dst ) | |||
{ | |||
DUMP_R( dst ); | |||
@@ -1349,6 +1462,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) | |||
} | |||
} | |||
void x87_fcomp( struct x86_function *p, struct x86_reg dst ) | |||
{ | |||
DUMP_R( dst ); | |||
@@ -1358,6 +1472,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) | |||
emit_1ub(p, 0xd8); | |||
emit_modrm_noreg(p, 3, dst); | |||
} | |||
note_x87_pop(p); | |||
} | |||
void x87_fcomi( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
emit_2ub(p, 0xdb, 0xf0+arg.idx); | |||
} | |||
void x87_fcomip( struct x86_function *p, struct x86_reg arg ) | |||
{ | |||
DUMP_R( arg ); | |||
emit_2ub(p, 0xdb, 0xf0+arg.idx); | |||
note_x87_pop(p); | |||
} | |||
@@ -1376,6 +1504,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) | |||
} | |||
void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) | |||
{ | |||
DUMP_R( dst ); | |||
assert(dst.file == file_REG32); | |||
emit_1ub(p, 0x9b); /* WAIT -- needed? */ | |||
emit_1ub(p, 0xd9); | |||
emit_modrm_noreg(p, 7, dst); | |||
} | |||
/*********************************************************************** | |||
@@ -1444,6 +1583,21 @@ void mmx_movq( struct x86_function *p, | |||
*/ | |||
void x86_cdecl_caller_push_regs( struct x86_function *p ) | |||
{ | |||
x86_push(p, x86_make_reg(file_REG32, reg_AX)); | |||
x86_push(p, x86_make_reg(file_REG32, reg_CX)); | |||
x86_push(p, x86_make_reg(file_REG32, reg_DX)); | |||
} | |||
void x86_cdecl_caller_pop_regs( struct x86_function *p ) | |||
{ | |||
x86_pop(p, x86_make_reg(file_REG32, reg_DX)); | |||
x86_pop(p, x86_make_reg(file_REG32, reg_CX)); | |||
x86_pop(p, x86_make_reg(file_REG32, reg_AX)); | |||
} | |||
/* Retreive a reference to one of the function arguments, taking into | |||
* account any push/pop activity: | |||
*/ |
@@ -43,10 +43,12 @@ struct x86_function { | |||
unsigned size; | |||
unsigned char *store; | |||
unsigned char *csr; | |||
unsigned stack_offset; | |||
int need_emms; | |||
unsigned stack_offset:16; | |||
unsigned need_emms:8; | |||
int x87_stack:8; | |||
unsigned char error_overflow[4]; | |||
const char *fn; | |||
}; | |||
enum x86_reg_file { | |||
@@ -109,6 +111,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ); | |||
void x86_release_func( struct x86_function *p ); | |||
void (*x86_get_func( struct x86_function *p ))( void ); | |||
/* Debugging: | |||
*/ | |||
void x86_print_reg( struct x86_reg reg ); | |||
/* Create and manipulate registers and regmem values: | |||
@@ -152,6 +157,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg); | |||
* I load the immediate into general purpose register and use it. | |||
*/ | |||
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); | |||
void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); | |||
/* Macro for sse_shufps() and sse2_pshufd(): | |||
@@ -222,6 +228,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
void x86_pop( struct x86_function *p, struct x86_reg reg ); | |||
void x86_push( struct x86_function *p, struct x86_reg reg ); | |||
void x86_push_imm32( struct x86_function *p, int imm ); | |||
void x86_ret( struct x86_function *p ); | |||
void x86_retw( struct x86_function *p, unsigned short imm ); | |||
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
@@ -229,13 +236,27 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
void x86_sahf( struct x86_function *p ); | |||
void x86_cdecl_caller_push_regs( struct x86_function *p ); | |||
void x86_cdecl_caller_pop_regs( struct x86_function *p ); | |||
void x87_assert_stack_empty( struct x86_function *p ); | |||
void x87_f2xm1( struct x86_function *p ); | |||
void x87_fabs( struct x86_function *p ); | |||
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
void x87_faddp( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fchs( struct x86_function *p ); | |||
void x87_fclex( struct x86_function *p ); | |||
void x87_fcmovb( struct x86_function *p, struct x86_reg src ); | |||
void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); | |||
void x87_fcmove( struct x86_function *p, struct x86_reg src ); | |||
void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); | |||
void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); | |||
void x87_fcmovne( struct x86_function *p, struct x86_reg src ); | |||
void x87_fcom( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fcomi( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fcomip( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fcomp( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fcos( struct x86_function *p ); | |||
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
@@ -255,6 +276,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
void x87_fmulp( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fnclex( struct x86_function *p ); | |||
void x87_fprndint( struct x86_function *p ); | |||
void x87_fpop( struct x86_function *p ); | |||
void x87_fscale( struct x86_function *p ); | |||
void x87_fsin( struct x86_function *p ); | |||
void x87_fsincos( struct x86_function *p ); | |||
@@ -265,11 +287,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
void x87_fsubp( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); | |||
void x87_ftst( struct x86_function *p ); | |||
void x87_fxch( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fxtract( struct x86_function *p ); | |||
void x87_fyl2x( struct x86_function *p ); | |||
void x87_fyl2xp1( struct x86_function *p ); | |||
void x87_fwait( struct x86_function *p ); | |||
void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); | |||
void x87_fucompp( struct x86_function *p ); | |||
void x87_fucomp( struct x86_function *p, struct x86_reg arg ); |
@@ -539,9 +539,9 @@ static const char *TGSI_MODULATES[] = | |||
"MODULATE_EIGHTH" | |||
}; | |||
static void | |||
dump_declaration_short( | |||
struct tgsi_full_declaration *decl ) | |||
void | |||
tgsi_dump_declaration( | |||
const struct tgsi_full_declaration *decl ) | |||
{ | |||
TXT( "\nDCL " ); | |||
ENM( decl->Declaration.File, TGSI_FILES_SHORT ); | |||
@@ -672,9 +672,9 @@ dump_declaration_verbose( | |||
} | |||
} | |||
static void | |||
dump_immediate_short( | |||
struct tgsi_full_immediate *imm ) | |||
void | |||
tgsi_dump_immediate( | |||
const struct tgsi_full_immediate *imm ) | |||
{ | |||
unsigned i; | |||
@@ -727,9 +727,9 @@ dump_immediate_verbose( | |||
} | |||
} | |||
static void | |||
dump_instruction_short( | |||
struct tgsi_full_instruction *inst, | |||
void | |||
tgsi_dump_instruction( | |||
const struct tgsi_full_instruction *inst, | |||
unsigned instno ) | |||
{ | |||
unsigned i; | |||
@@ -1281,17 +1281,17 @@ tgsi_dump( | |||
switch( parse.FullToken.Token.Type ) { | |||
case TGSI_TOKEN_TYPE_DECLARATION: | |||
dump_declaration_short( | |||
tgsi_dump_declaration( | |||
&parse.FullToken.FullDeclaration ); | |||
break; | |||
case TGSI_TOKEN_TYPE_IMMEDIATE: | |||
dump_immediate_short( | |||
tgsi_dump_immediate( | |||
&parse.FullToken.FullImmediate ); | |||
break; | |||
case TGSI_TOKEN_TYPE_INSTRUCTION: | |||
dump_instruction_short( | |||
tgsi_dump_instruction( | |||
&parse.FullToken.FullInstruction, | |||
instno ); | |||
instno++; |
@@ -14,6 +14,24 @@ tgsi_dump( | |||
const struct tgsi_token *tokens, | |||
unsigned flags ); | |||
struct tgsi_full_immediate; | |||
struct tgsi_full_instruction; | |||
struct tgsi_full_declaration; | |||
void | |||
tgsi_dump_immediate( | |||
const struct tgsi_full_immediate *imm ); | |||
void | |||
tgsi_dump_instruction( | |||
const struct tgsi_full_instruction *inst, | |||
unsigned instno ); | |||
void | |||
tgsi_dump_declaration( | |||
const struct tgsi_full_declaration *decl ); | |||
#if defined __cplusplus | |||
} | |||
#endif |
@@ -71,15 +71,15 @@ struct translate { | |||
const void *ptr, | |||
unsigned stride ); | |||
void (*run_elts)( struct translate *, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer); | |||
void (*run)( struct translate *, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer); | |||
void (PIPE_CDECL *run_elts)( struct translate *, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer); | |||
void (PIPE_CDECL *run)( struct translate *, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer); | |||
}; | |||
@@ -541,10 +541,10 @@ static emit_func get_emit_func( enum pipe_format format ) | |||
/** | |||
* Fetch vertex attributes for 'count' vertices. | |||
*/ | |||
static void generic_run_elts( struct translate *translate, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer ) | |||
static void PIPE_CDECL generic_run_elts( struct translate *translate, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer ) | |||
{ | |||
struct translate_generic *tg = translate_generic(translate); | |||
char *vert = output_buffer; | |||
@@ -580,10 +580,10 @@ static void generic_run_elts( struct translate *translate, | |||
static void generic_run( struct translate *translate, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer ) | |||
static void PIPE_CDECL generic_run( struct translate *translate, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer ) | |||
{ | |||
struct translate_generic *tg = translate_generic(translate); | |||
char *vert = output_buffer; |
@@ -46,22 +46,16 @@ | |||
#define W 3 | |||
#ifdef WIN32 | |||
#define RTASM __cdecl | |||
#else | |||
#define RTASM | |||
#endif | |||
typedef void (RTASM *run_func)( struct translate *translate, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer ); | |||
typedef void (RTASM *run_elts_func)( struct translate *translate, | |||
const unsigned *elts, | |||
typedef void (PIPE_CDECL *run_func)( struct translate *translate, | |||
unsigned start, | |||
unsigned count, | |||
void *output_buffer ); | |||
typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, | |||
const unsigned *elts, | |||
unsigned count, | |||
void *output_buffer ); | |||
struct translate_sse { | |||
@@ -473,13 +467,7 @@ static boolean build_vertex_emit( struct translate_sse *p, | |||
x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); | |||
/* Incr index | |||
*/ /* Emit code for each of the attributes. Currently routes | |||
* everything through SSE registers, even when it might be more | |||
* efficient to stick with regular old x86. No optimization or | |||
* other tricks - enough new ground to cover here just getting | |||
* things working. | |||
*/ | |||
*/ | |||
if (linear) { | |||
x86_inc(p->func, idxEBX); | |||
} |
@@ -88,7 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe ) | |||
struct pipe_winsys *ws = pipe->winsys; | |||
uint i; | |||
draw_destroy( softpipe->draw ); | |||
if (softpipe->draw) | |||
draw_destroy( softpipe->draw ); | |||
softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); | |||
softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); | |||
@@ -216,8 +217,12 @@ softpipe_create( struct pipe_screen *screen, | |||
* Create drawing context and plug our rendering stage into it. | |||
*/ | |||
softpipe->draw = draw_create(); | |||
assert(softpipe->draw); | |||
if (!softpipe->draw) | |||
goto fail; | |||
softpipe->setup = sp_draw_render_stage(softpipe); | |||
if (!softpipe->setup) | |||
goto fail; | |||
if (GETENV( "SP_NO_RAST" ) != NULL) | |||
softpipe->no_rast = TRUE; | |||
@@ -241,4 +246,8 @@ softpipe_create( struct pipe_screen *screen, | |||
sp_init_surface_functions(softpipe); | |||
return &softpipe->pipe; | |||
fail: | |||
softpipe_destroy(&softpipe->pipe); | |||
return NULL; | |||
} |
@@ -64,16 +64,17 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) | |||
} | |||
typedef const float (*cptrf4)[4]; | |||
static void | |||
do_tri(struct draw_stage *stage, struct prim_header *prim) | |||
{ | |||
struct setup_stage *setup = setup_stage( stage ); | |||
setup_tri( setup->setup, | |||
prim->v[0]->data, | |||
prim->v[1]->data, | |||
prim->v[2]->data ); | |||
(cptrf4)prim->v[0]->data, | |||
(cptrf4)prim->v[1]->data, | |||
(cptrf4)prim->v[2]->data ); | |||
} | |||
static void | |||
@@ -82,8 +83,8 @@ do_line(struct draw_stage *stage, struct prim_header *prim) | |||
struct setup_stage *setup = setup_stage( stage ); | |||
setup_line( setup->setup, | |||
prim->v[0]->data, | |||
prim->v[1]->data ); | |||
(cptrf4)prim->v[0]->data, | |||
(cptrf4)prim->v[1]->data ); | |||
} | |||
static void | |||
@@ -92,7 +93,7 @@ do_point(struct draw_stage *stage, struct prim_header *prim) | |||
struct setup_stage *setup = setup_stage( stage ); | |||
setup_point( setup->setup, | |||
prim->v[0]->data ); | |||
(cptrf4)prim->v[0]->data ); | |||
} | |||
@@ -116,30 +116,28 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) | |||
if (prim == PIPE_PRIM_TRIANGLES || | |||
prim == PIPE_PRIM_LINES || | |||
prim == PIPE_PRIM_POINTS) { | |||
cvbr->prim = prim; | |||
return TRUE; | |||
} | |||
else { | |||
return FALSE; | |||
} | |||
cvbr->prim = prim; | |||
return TRUE; | |||
} | |||
static INLINE cptrf4 get_vert( const void *vertex_buffer, | |||
int index, | |||
int stride ) | |||
{ | |||
return (cptrf4)((char *)vertex_buffer + index * stride); | |||
} | |||
static void | |||
sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) | |||
sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) | |||
{ | |||
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); | |||
struct softpipe_context *softpipe = cvbr->softpipe; | |||
unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
unsigned i, j; | |||
void *vertex_buffer = cvbr->vertex_buffer; | |||
cptrf4 v[3]; | |||
unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
unsigned i; | |||
const void *vertex_buffer = cvbr->vertex_buffer; | |||
/* XXX: break this dependency - make setup_context live under | |||
* softpipe, rename the old "setup" draw stage to something else. | |||
@@ -149,40 +147,98 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) | |||
switch (cvbr->prim) { | |||
case PIPE_PRIM_TRIANGLES: | |||
for (i = 0; i < nr_indices; i += 3) { | |||
for (j = 0; j < 3; j++) | |||
v[j] = (cptrf4)((char *)vertex_buffer + | |||
indices[i+j] * vertex_size); | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2]); | |||
case PIPE_PRIM_POINTS: | |||
for (i = 0; i < nr; i++) { | |||
setup_point( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-0], stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_LINES: | |||
for (i = 0; i < nr_indices; i += 2) { | |||
for (j = 0; j < 2; j++) | |||
v[j] = (cptrf4)((char *)vertex_buffer + | |||
indices[i+j] * vertex_size); | |||
for (i = 1; i < nr; i += 2) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_STRIP: | |||
for (i = 1; i < nr; i ++) { | |||
setup_line( setup_ctx, | |||
v[0], | |||
v[1] ); | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_POINTS: | |||
for (i = 0; i < nr_indices; i++) { | |||
v[0] = (cptrf4)((char *)vertex_buffer + | |||
indices[i] * vertex_size); | |||
case PIPE_PRIM_LINE_LOOP: | |||
for (i = 1; i < nr; i ++) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride) ); | |||
} | |||
if (nr) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, indices[nr-1], stride), | |||
get_vert(vertex_buffer, indices[0], stride) ); | |||
} | |||
break; | |||
setup_point( setup_ctx, | |||
v[0] ); | |||
case PIPE_PRIM_TRIANGLES: | |||
for (i = 2; i < nr; i += 3) { | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-2], stride), | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
for (i = 2; i < nr; i += 1) { | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[i+(i&1)-2], stride), | |||
get_vert(vertex_buffer, indices[i-(i&1)-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_FAN: | |||
case PIPE_PRIM_POLYGON: | |||
for (i = 2; i < nr; i += 1) { | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[0], stride), | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_QUADS: | |||
for (i = 3; i < nr; i += 4) { | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-3], stride), | |||
get_vert(vertex_buffer, indices[i-2], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-2], stride), | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_QUAD_STRIP: | |||
for (i = 3; i < nr; i += 2) { | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-3], stride), | |||
get_vert(vertex_buffer, indices[i-2], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
setup_tri( setup_ctx, | |||
get_vert(vertex_buffer, indices[i-1], stride), | |||
get_vert(vertex_buffer, indices[i-3], stride), | |||
get_vert(vertex_buffer, indices[i-0], stride)); | |||
} | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
/* XXX: why are we calling this??? If we had to call something, it | |||
@@ -202,131 +258,107 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) | |||
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); | |||
struct softpipe_context *softpipe = cvbr->softpipe; | |||
struct draw_stage *setup = softpipe->setup; | |||
const void *vertex_buffer = cvbr->vertex_buffer; | |||
const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
const void *vertex_buffer = NULL; | |||
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
unsigned i; | |||
struct setup_context *setup_ctx = sp_draw_setup_context(setup); | |||
cptrf4 v[3]; | |||
#define VERTEX(I) \ | |||
(cptrf4) ((char *) vertex_buffer + (I) * vertex_size) | |||
vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride); | |||
switch (cvbr->prim) { | |||
case PIPE_PRIM_POINTS: | |||
for (i = 0; i < nr; i++) { | |||
v[0] = VERTEX(i); | |||
setup_point( setup_ctx, v[0] ); | |||
setup_point( setup_ctx, | |||
get_vert(vertex_buffer, i-0, stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_LINES: | |||
assert(nr % 2 == 0); | |||
for (i = 0; i < nr; i += 2) { | |||
v[0] = VERTEX(i); | |||
v[1] = VERTEX(i + 1); | |||
setup_line( setup_ctx, v[0], v[1] ); | |||
for (i = 1; i < nr; i += 2) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-0, stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_STRIP: | |||
for (i = 1; i < nr; i++) { | |||
v[0] = VERTEX(i - 1); | |||
v[1] = VERTEX(i); | |||
setup_line( setup_ctx, v[0], v[1] ); | |||
for (i = 1; i < nr; i ++) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-0, stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_LINE_LOOP: | |||
for (i = 1; i < nr; i ++) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-0, stride) ); | |||
} | |||
if (nr) { | |||
setup_line( setup_ctx, | |||
get_vert(vertex_buffer, nr-1, stride), | |||
get_vert(vertex_buffer, 0, stride) ); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLES: | |||
assert(nr % 3 == 0); | |||
for (i = 0; i < nr; i += 3) { | |||
v[0] = VERTEX(i + 0); | |||
v[1] = VERTEX(i + 1); | |||
v[2] = VERTEX(i + 2); | |||
for (i = 2; i < nr; i += 3) { | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, i-2, stride), | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_STRIP: | |||
assert(nr >= 3); | |||
for (i = 2; i < nr; i++) { | |||
v[0] = VERTEX(i - 2); | |||
v[1] = VERTEX(i - 1); | |||
v[2] = VERTEX(i); | |||
for (i = 2; i < nr; i += 1) { | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, i+(i&1)-2, stride), | |||
get_vert(vertex_buffer, i-(i&1)-1, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_TRIANGLE_FAN: | |||
assert(nr >= 3); | |||
for (i = 2; i < nr; i++) { | |||
v[0] = VERTEX(0); | |||
v[1] = VERTEX(i - 1); | |||
v[2] = VERTEX(i); | |||
case PIPE_PRIM_POLYGON: | |||
for (i = 2; i < nr; i += 1) { | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, 0, stride), | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_QUADS: | |||
assert(nr % 4 == 0); | |||
for (i = 0; i < nr; i += 4) { | |||
v[0] = VERTEX(i + 0); | |||
v[1] = VERTEX(i + 1); | |||
v[2] = VERTEX(i + 2); | |||
for (i = 3; i < nr; i += 4) { | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, i-3, stride), | |||
get_vert(vertex_buffer, i-2, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
v[0] = VERTEX(i + 0); | |||
v[1] = VERTEX(i + 2); | |||
v[2] = VERTEX(i + 3); | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, i-2, stride), | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
} | |||
break; | |||
case PIPE_PRIM_QUAD_STRIP: | |||
assert(nr >= 4); | |||
for (i = 2; i < nr; i += 2) { | |||
v[0] = VERTEX(i - 2); | |||
v[1] = VERTEX(i); | |||
v[2] = VERTEX(i + 1); | |||
for (i = 3; i < nr; i += 2) { | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, i-3, stride), | |||
get_vert(vertex_buffer, i-2, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
v[0] = VERTEX(i - 2); | |||
v[1] = VERTEX(i + 1); | |||
v[2] = VERTEX(i - 1); | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
} | |||
break; | |||
case PIPE_PRIM_POLYGON: | |||
/* draw as tri fan */ | |||
for (i = 2; i < nr; i++) { | |||
v[0] = VERTEX(0); | |||
v[1] = VERTEX(i - 1); | |||
v[2] = VERTEX(i); | |||
setup_tri( setup_ctx, | |||
v[0], | |||
v[1], | |||
v[2] ); | |||
get_vert(vertex_buffer, i-1, stride), | |||
get_vert(vertex_buffer, i-3, stride), | |||
get_vert(vertex_buffer, i-0, stride)); | |||
} | |||
break; | |||
default: | |||
/* XXX finish remaining prim types */ | |||
assert(0); | |||
} | |||
#undef VERTEX | |||
} | |||
@@ -119,6 +119,17 @@ typedef unsigned char boolean; | |||
#endif | |||
/* This should match linux gcc cdecl semantics everywhere, so that we | |||
* just codegen one calling convention on all platforms. | |||
*/ | |||
#ifdef WIN32 | |||
#define PIPE_CDECL __cdecl | |||
#else | |||
#define PIPE_CDECL | |||
#endif | |||
#if defined __GNUC__ | |||
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) | |||
#define ALIGN16_ASSIGN(NAME) NAME##___aligned | |||
@@ -131,12 +142,16 @@ typedef unsigned char boolean; | |||
/** For calling code-gen'd functions */ | |||
/** | |||
* For calling code-gen'd functions, phase out in favor of | |||
* PIPE_CDECL, above, which really means cdecl on all platforms, not | |||
* like the below... | |||
*/ | |||
#if !defined(XSTDCALL) | |||
#if defined(WIN32) | |||
#define XSTDCALL __stdcall | |||
#define XSTDCALL __stdcall /* phase this out */ | |||
#else | |||
#define XSTDCALL | |||
#define XSTDCALL /* XXX: NOTE! not STDCALL! */ | |||
#endif | |||
#endif | |||
@@ -797,8 +797,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) | |||
pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v)); | |||
} | |||
if (pipe == NULL) | |||
goto fail; | |||
c->st = st_create_context(pipe, &v->mesa_visual, | |||
share_list ? share_list->st : NULL); | |||
if (c->st == NULL) | |||
goto fail; | |||
mesaCtx = c->st->ctx; | |||
c->st->ctx->DriverCtx = c; | |||
@@ -818,6 +824,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) | |||
#endif | |||
return c; | |||
fail: | |||
if (c->st) | |||
st_destroy_context(c->st); | |||
if (pipe) | |||
pipe->destroy(pipe); | |||
FREE(c); | |||
return NULL; | |||
} | |||
@@ -53,7 +53,9 @@ struct state_key { | |||
unsigned light_color_material:1; | |||
unsigned light_color_material_mask:12; | |||
unsigned light_material_mask:12; | |||
unsigned material_shininess_is_zero:1; | |||
unsigned need_eye_coords:1; | |||
unsigned normalize:1; | |||
unsigned rescale_normals:1; | |||
unsigned fog_source_is_depth:1; | |||
@@ -154,6 +156,26 @@ tnl_get_per_vertex_fog(GLcontext *ctx) | |||
#endif | |||
} | |||
static GLboolean check_active_shininess( GLcontext *ctx, | |||
const struct state_key *key, | |||
GLuint side ) | |||
{ | |||
GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side); | |||
if (key->light_color_material_mask & bit) | |||
return GL_TRUE; | |||
if (key->light_material_mask & bit) | |||
return GL_TRUE; | |||
if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F) | |||
return GL_TRUE; | |||
return GL_FALSE; | |||
} | |||
static struct state_key *make_state_key( GLcontext *ctx ) | |||
{ | |||
@@ -167,6 +189,8 @@ static struct state_key *make_state_key( GLcontext *ctx ) | |||
*/ | |||
assert(fp); | |||
key->need_eye_coords = ctx->_NeedEyeCoords; | |||
key->fragprog_inputs_read = fp->Base.InputsRead; | |||
if (ctx->RenderMode == GL_FEEDBACK) { | |||
@@ -211,6 +235,17 @@ static struct state_key *make_state_key( GLcontext *ctx ) | |||
key->unit[i].light_attenuated = 1; | |||
} | |||
} | |||
if (check_active_shininess(ctx, key, 0)) { | |||
key->material_shininess_is_zero = 0; | |||
} | |||
else if (key->light_twoside && | |||
check_active_shininess(ctx, key, 1)) { | |||
key->material_shininess_is_zero = 0; | |||
} | |||
else { | |||
key->material_shininess_is_zero = 1; | |||
} | |||
} | |||
if (ctx->Transform.Normalize) | |||
@@ -270,7 +305,7 @@ static struct state_key *make_state_key( GLcontext *ctx ) | |||
* generated program with line/function references for each | |||
* instruction back into this file: | |||
*/ | |||
#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM) | |||
#define DISASSEM 1 | |||
/* Should be tunable by the driver - do we want to do matrix | |||
* multiplications with DP4's or with MUL/MAD's? SSE works better | |||
@@ -309,8 +344,9 @@ struct tnl_program { | |||
GLuint temp_reserved; | |||
struct ureg eye_position; | |||
struct ureg eye_position_z; | |||
struct ureg eye_position_normalized; | |||
struct ureg eye_normal; | |||
struct ureg transformed_normal; | |||
struct ureg identity; | |||
GLuint materials; | |||
@@ -653,9 +689,9 @@ static void emit_normalize_vec3( struct tnl_program *p, | |||
struct ureg src ) | |||
{ | |||
struct ureg tmp = get_temp(p); | |||
emit_op2(p, OPCODE_DP3, tmp, 0, src, src); | |||
emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); | |||
emit_op2(p, OPCODE_MUL, dest, 0, src, tmp); | |||
emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); | |||
emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); | |||
emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); | |||
release_temp(p, tmp); | |||
} | |||
@@ -693,6 +729,28 @@ static struct ureg get_eye_position( struct tnl_program *p ) | |||
} | |||
static struct ureg get_eye_position_z( struct tnl_program *p ) | |||
{ | |||
if (!is_undef(p->eye_position)) | |||
return swizzle1(p->eye_position, Z); | |||
if (is_undef(p->eye_position_z)) { | |||
struct ureg pos = register_input( p, VERT_ATTRIB_POS ); | |||
struct ureg modelview[4]; | |||
p->eye_position_z = reserve_temp(p); | |||
register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, | |||
0, modelview ); | |||
emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); | |||
} | |||
return p->eye_position_z; | |||
} | |||
static struct ureg get_eye_position_normalized( struct tnl_program *p ) | |||
{ | |||
if (is_undef(p->eye_position_normalized)) { | |||
@@ -705,36 +763,52 @@ static struct ureg get_eye_position_normalized( struct tnl_program *p ) | |||
} | |||
static struct ureg get_eye_normal( struct tnl_program *p ) | |||
static struct ureg get_transformed_normal( struct tnl_program *p ) | |||
{ | |||
if (is_undef(p->eye_normal)) { | |||
if (is_undef(p->transformed_normal) && | |||
!p->state->need_eye_coords && | |||
!p->state->normalize && | |||
!(p->state->need_eye_coords == p->state->rescale_normals)) | |||
{ | |||
p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); | |||
} | |||
else if (is_undef(p->transformed_normal)) | |||
{ | |||
struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); | |||
struct ureg mvinv[3]; | |||
struct ureg transformed_normal = reserve_temp(p); | |||
register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, | |||
STATE_MATRIX_INVTRANS, mvinv ); | |||
p->eye_normal = reserve_temp(p); | |||
if (p->state->need_eye_coords) { | |||
register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, | |||
STATE_MATRIX_INVTRANS, mvinv ); | |||
/* Transform to eye space: | |||
*/ | |||
emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); | |||
/* Transform to eye space: | |||
*/ | |||
emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); | |||
normal = transformed_normal; | |||
} | |||
/* Normalize/Rescale: | |||
*/ | |||
if (p->state->normalize) { | |||
emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); | |||
emit_normalize_vec3( p, transformed_normal, normal ); | |||
normal = transformed_normal; | |||
} | |||
else if (p->state->rescale_normals) { | |||
else if (p->state->need_eye_coords == p->state->rescale_normals) { | |||
/* This is already adjusted for eye/non-eye rendering: | |||
*/ | |||
struct ureg rescale = register_param2(p, STATE_INTERNAL, | |||
STATE_NORMAL_SCALE); | |||
STATE_NORMAL_SCALE); | |||
emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal, | |||
swizzle1(rescale, X)); | |||
emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); | |||
normal = transformed_normal; | |||
} | |||
assert(normal.file == PROGRAM_TEMPORARY); | |||
p->transformed_normal = normal; | |||
} | |||
return p->eye_normal; | |||
return p->transformed_normal; | |||
} | |||
@@ -856,7 +930,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, | |||
*/ | |||
if (!p->state->unit[i].light_spotcutoff_is_180) { | |||
struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, | |||
STATE_SPOT_DIR_NORMALIZED, i); | |||
STATE_LIGHT_SPOT_DIR_NORMALIZED, i); | |||
struct ureg spot = get_temp(p); | |||
struct ureg slt = get_temp(p); | |||
@@ -895,7 +969,26 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, | |||
} | |||
static void emit_degenerate_lit( struct tnl_program *p, | |||
struct ureg lit, | |||
struct ureg dots ) | |||
{ | |||
struct ureg id = get_identity_param(p); | |||
/* Note that result.x & result.w will not be examined. Note also that | |||
* dots.xyzw == dots.xxxx. | |||
*/ | |||
/* result[1] = MAX2(in, 0) | |||
*/ | |||
emit_op2(p, OPCODE_MAX, lit, 0, id, dots); | |||
/* result[2] = (in > 0 ? 1 : 0) | |||
*/ | |||
emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, | |||
lit, /* 0 */ | |||
dots); /* in[0] */ | |||
} | |||
/* Need to add some addtional parameters to allow lighting in object | |||
@@ -907,7 +1000,7 @@ static void build_lighting( struct tnl_program *p ) | |||
const GLboolean twoside = p->state->light_twoside; | |||
const GLboolean separate = p->state->separate_specular; | |||
GLuint nr_lights = 0, count = 0; | |||
struct ureg normal = get_eye_normal(p); | |||
struct ureg normal = get_transformed_normal(p); | |||
struct ureg lit = get_temp(p); | |||
struct ureg dots = get_temp(p); | |||
struct ureg _col0 = undef, _col1 = undef; | |||
@@ -921,9 +1014,11 @@ static void build_lighting( struct tnl_program *p ) | |||
set_material_flags(p); | |||
{ | |||
struct ureg shininess = get_material(p, 0, STATE_SHININESS); | |||
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); | |||
release_temp(p, shininess); | |||
if (!p->state->material_shininess_is_zero) { | |||
struct ureg shininess = get_material(p, 0, STATE_SHININESS); | |||
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); | |||
release_temp(p, shininess); | |||
} | |||
_col0 = make_temp(p, get_scenecolor(p, 0)); | |||
if (separate) | |||
@@ -934,10 +1029,12 @@ static void build_lighting( struct tnl_program *p ) | |||
} | |||
if (twoside) { | |||
struct ureg shininess = get_material(p, 1, STATE_SHININESS); | |||
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, | |||
negate(swizzle1(shininess,X))); | |||
release_temp(p, shininess); | |||
if (!p->state->material_shininess_is_zero) { | |||
struct ureg shininess = get_material(p, 1, STATE_SHININESS); | |||
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, | |||
negate(swizzle1(shininess,X))); | |||
release_temp(p, shininess); | |||
} | |||
_bfc0 = make_temp(p, get_scenecolor(p, 1)); | |||
if (separate) | |||
@@ -984,25 +1081,28 @@ static void build_lighting( struct tnl_program *p ) | |||
/* Can used precomputed constants in this case. | |||
* Attenuation never applies to infinite lights. | |||
*/ | |||
VPpli = register_param3(p, STATE_LIGHT, i, | |||
STATE_POSITION_NORMALIZED); | |||
if (p->state->light_local_viewer) { | |||
struct ureg eye_hat = get_eye_position_normalized(p); | |||
half = get_temp(p); | |||
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
emit_normalize_vec3(p, half, half); | |||
} else { | |||
half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR); | |||
VPpli = register_param3(p, STATE_INTERNAL, | |||
STATE_LIGHT_POSITION_NORMALIZED, i); | |||
if (!p->state->material_shininess_is_zero) { | |||
if (p->state->light_local_viewer) { | |||
struct ureg eye_hat = get_eye_position_normalized(p); | |||
half = get_temp(p); | |||
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
emit_normalize_vec3(p, half, half); | |||
} else { | |||
half = register_param3(p, STATE_INTERNAL, | |||
STATE_LIGHT_HALF_VECTOR, i); | |||
} | |||
} | |||
} | |||
else { | |||
struct ureg Ppli = register_param3(p, STATE_LIGHT, i, | |||
STATE_POSITION); | |||
struct ureg Ppli = register_param3(p, STATE_INTERNAL, | |||
STATE_LIGHT_POSITION, i); | |||
struct ureg V = get_eye_position(p); | |||
struct ureg dist = get_temp(p); | |||
VPpli = get_temp(p); | |||
half = get_temp(p); | |||
/* Calculate VPpli vector | |||
*/ | |||
@@ -1024,24 +1124,33 @@ static void build_lighting( struct tnl_program *p ) | |||
/* Calculate viewer direction, or use infinite viewer: | |||
*/ | |||
if (p->state->light_local_viewer) { | |||
struct ureg eye_hat = get_eye_position_normalized(p); | |||
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
} | |||
else { | |||
struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); | |||
emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); | |||
} | |||
emit_normalize_vec3(p, half, half); | |||
if (!p->state->material_shininess_is_zero) { | |||
half = get_temp(p); | |||
if (p->state->light_local_viewer) { | |||
struct ureg eye_hat = get_eye_position_normalized(p); | |||
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
} | |||
else { | |||
struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); | |||
emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); | |||
} | |||
emit_normalize_vec3(p, half, half); | |||
} | |||
release_temp(p, dist); | |||
} | |||
/* Calculate dot products: | |||
*/ | |||
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); | |||
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); | |||
if (p->state->material_shininess_is_zero) { | |||
emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); | |||
} | |||
else { | |||
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); | |||
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); | |||
} | |||
/* Front face lighting: | |||
*/ | |||
@@ -1052,11 +1161,6 @@ static void build_lighting( struct tnl_program *p ) | |||
struct ureg res0, res1; | |||
GLuint mask0, mask1; | |||
emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
if (!is_undef(att)) | |||
emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
if (count == nr_lights) { | |||
if (separate) { | |||
@@ -1078,7 +1182,21 @@ static void build_lighting( struct tnl_program *p ) | |||
res1 = _col1; | |||
} | |||
emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); | |||
if (!is_undef(att)) { | |||
emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); | |||
} | |||
else if (!p->state->material_shininess_is_zero) { | |||
emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
} | |||
else { | |||
emit_degenerate_lit(p, lit, dots); | |||
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
} | |||
emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); | |||
emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); | |||
@@ -1096,11 +1214,6 @@ static void build_lighting( struct tnl_program *p ) | |||
struct ureg res0, res1; | |||
GLuint mask0, mask1; | |||
emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z))); | |||
if (!is_undef(att)) | |||
emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
if (count == nr_lights) { | |||
if (separate) { | |||
mask0 = WRITEMASK_XYZ; | |||
@@ -1121,7 +1234,23 @@ static void build_lighting( struct tnl_program *p ) | |||
mask1 = 0; | |||
} | |||
emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); | |||
dots = negate(swizzle(dots,X,Y,W,Z)); | |||
if (!is_undef(att)) { | |||
emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); | |||
} | |||
else if (!p->state->material_shininess_is_zero) { | |||
emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
} | |||
else { | |||
emit_degenerate_lit(p, lit, dots); | |||
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
} | |||
emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); | |||
emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); | |||
emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); | |||
@@ -1146,7 +1275,7 @@ static void build_fog( struct tnl_program *p ) | |||
struct ureg input; | |||
if (p->state->fog_source_is_depth) { | |||
input = swizzle1(get_eye_position(p), Z); | |||
input = get_eye_position_z(p); | |||
} | |||
else { | |||
input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); | |||
@@ -1201,7 +1330,7 @@ static void build_reflect_texgen( struct tnl_program *p, | |||
struct ureg dest, | |||
GLuint writemask ) | |||
{ | |||
struct ureg normal = get_eye_normal(p); | |||
struct ureg normal = get_transformed_normal(p); | |||
struct ureg eye_hat = get_eye_position_normalized(p); | |||
struct ureg tmp = get_temp(p); | |||
@@ -1219,7 +1348,7 @@ static void build_sphere_texgen( struct tnl_program *p, | |||
struct ureg dest, | |||
GLuint writemask ) | |||
{ | |||
struct ureg normal = get_eye_normal(p); | |||
struct ureg normal = get_transformed_normal(p); | |||
struct ureg eye_hat = get_eye_position_normalized(p); | |||
struct ureg tmp = get_temp(p); | |||
struct ureg half = register_scalar_const(p, .5); | |||
@@ -1338,7 +1467,7 @@ static void build_texture_transform( struct tnl_program *p ) | |||
} | |||
if (normal_mask) { | |||
struct ureg normal = get_eye_normal(p); | |||
struct ureg normal = get_transformed_normal(p); | |||
emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); | |||
} | |||
@@ -1376,7 +1505,7 @@ static void build_texture_transform( struct tnl_program *p ) | |||
static void build_pointsize( struct tnl_program *p ) | |||
{ | |||
struct ureg eye = get_eye_position(p); | |||
struct ureg eye = get_eye_position_z(p); | |||
struct ureg state_size = register_param1(p, STATE_POINT_SIZE); | |||
struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); | |||
struct ureg out = register_output(p, VERT_RESULT_PSIZ); | |||
@@ -1474,8 +1603,9 @@ create_new_program( const struct state_key *key, | |||
p.state = key; | |||
p.program = program; | |||
p.eye_position = undef; | |||
p.eye_position_z = undef; | |||
p.eye_position_normalized = undef; | |||
p.eye_normal = undef; | |||
p.transformed_normal = undef; | |||
p.identity = undef; | |||
p.temp_in_use = 0; | |||
@@ -1357,6 +1357,7 @@ _mesa_init_lighting( GLcontext *ctx ) | |||
/* Miscellaneous */ | |||
ctx->Light._NeedEyeCoords = GL_FALSE; | |||
ctx->_NeedEyeCoords = GL_FALSE; | |||
ctx->_ForceEyeCoords = GL_TRUE; | |||
ctx->_ModelViewInvScale = 1.0; | |||
} | |||
@@ -1209,18 +1209,6 @@ _mesa_update_state_locked( GLcontext *ctx ) | |||
| _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR)) | |||
update_tricaps( ctx, new_state ); | |||
if (ctx->FragmentProgram._MaintainTexEnvProgram) { | |||
prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); | |||
} | |||
if (ctx->VertexProgram._MaintainTnlProgram) { | |||
prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX | | |||
_NEW_TRANSFORM | _NEW_POINT | | |||
_NEW_FOG | _NEW_LIGHT); | |||
} | |||
if (new_state & prog_flags) | |||
update_program( ctx ); | |||
/* ctx->_NeedEyeCoords is now up to date. | |||
* | |||
* If the truth value of this variable has changed, update for the | |||
@@ -1233,6 +1221,20 @@ _mesa_update_state_locked( GLcontext *ctx ) | |||
if (new_state & _MESA_NEW_NEED_EYE_COORDS) | |||
_mesa_update_tnl_spaces( ctx, new_state ); | |||
if (ctx->FragmentProgram._MaintainTexEnvProgram) { | |||
prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); | |||
} | |||
if (ctx->VertexProgram._MaintainTnlProgram) { | |||
prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX | | |||
_NEW_TRANSFORM | _NEW_POINT | | |||
_NEW_FOG | _NEW_LIGHT | | |||
_MESA_NEW_NEED_EYE_COORDS); | |||
} | |||
if (new_state & prog_flags) | |||
update_program( ctx ); | |||
/* | |||
* Give the driver a chance to act upon the new_state flags. | |||
* The driver might plug in different span functions, for example. |
@@ -134,10 +134,6 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], | |||
value[3] = 1.0; | |||
} | |||
return; | |||
case STATE_POSITION_NORMALIZED: | |||
COPY_4V(value, ctx->Light.Light[ln].EyePosition); | |||
NORMALIZE_3FV( value ); | |||
return; | |||
default: | |||
_mesa_problem(ctx, "Invalid light state in fetch_state"); | |||
return; | |||
@@ -401,7 +397,11 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], | |||
case STATE_INTERNAL: | |||
switch (state[1]) { | |||
case STATE_NORMAL_SCALE: | |||
ASSIGN_4V(value, ctx->_ModelViewInvScale, 0, 0, 1); | |||
ASSIGN_4V(value, | |||
ctx->_ModelViewInvScale, | |||
ctx->_ModelViewInvScale, | |||
ctx->_ModelViewInvScale, | |||
1); | |||
return; | |||
case STATE_TEXRECT_SCALE: | |||
{ | |||
@@ -431,15 +431,46 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], | |||
value[2] = ctx->Fog.Density * ONE_DIV_LN2; | |||
value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2; | |||
return; | |||
case STATE_SPOT_DIR_NORMALIZED: { | |||
case STATE_LIGHT_SPOT_DIR_NORMALIZED: { | |||
/* here, state[2] is the light number */ | |||
/* pre-normalize spot dir */ | |||
const GLuint ln = (GLuint) state[2]; | |||
COPY_3V(value, ctx->Light.Light[ln].EyeDirection); | |||
NORMALIZE_3FV(value); | |||
COPY_3V(value, ctx->Light.Light[ln]._NormDirection); | |||
value[3] = ctx->Light.Light[ln]._CosCutoff; | |||
return; | |||
} | |||
case STATE_LIGHT_POSITION: { | |||
const GLuint ln = (GLuint) state[2]; | |||
COPY_4V(value, ctx->Light.Light[ln]._Position); | |||
return; | |||
} | |||
case STATE_LIGHT_POSITION_NORMALIZED: { | |||
const GLuint ln = (GLuint) state[2]; | |||
COPY_4V(value, ctx->Light.Light[ln]._Position); | |||
NORMALIZE_3FV( value ); | |||
return; | |||
} | |||
case STATE_LIGHT_HALF_VECTOR: { | |||
const GLuint ln = (GLuint) state[2]; | |||
GLfloat p[3]; | |||
/* Compute infinite half angle vector: | |||
* halfVector = normalize(normalize(lightPos) + (0, 0, 1)) | |||
* light.EyePosition.w should be 0 for infinite lights. | |||
*/ | |||
COPY_3V(p, ctx->Light.Light[ln]._Position); | |||
NORMALIZE_3FV(p); | |||
ADD_3V(value, p, ctx->_EyeZDir); | |||
NORMALIZE_3FV(value); | |||
value[3] = 1.0; | |||
return; | |||
} | |||
case STATE_PT_SCALE: | |||
value[0] = ctx->Pixel.RedScale; | |||
value[1] = ctx->Pixel.GreenScale; | |||
@@ -696,7 +727,6 @@ append_token(char *dst, gl_state_index k) | |||
append(dst, "normalScale"); | |||
break; | |||
case STATE_INTERNAL: | |||
case STATE_POSITION_NORMALIZED: | |||
append(dst, "(internal)"); | |||
break; | |||
case STATE_PT_SCALE: |
@@ -106,9 +106,11 @@ typedef enum gl_state_index_ { | |||
STATE_INTERNAL, /* Mesa additions */ | |||
STATE_NORMAL_SCALE, | |||
STATE_TEXRECT_SCALE, | |||
STATE_POSITION_NORMALIZED, /* normalized light position */ | |||
STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */ | |||
STATE_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */ | |||
STATE_LIGHT_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */ | |||
STATE_LIGHT_POSITION, /* object vs eye space */ | |||
STATE_LIGHT_POSITION_NORMALIZED, /* object vs eye space */ | |||
STATE_LIGHT_HALF_VECTOR, /* object vs eye space */ | |||
STATE_PT_SCALE, /**< Pixel transfer RGBA scale */ | |||
STATE_PT_BIAS, /**< Pixel transfer RGBA bias */ | |||
STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ |