| @@ -230,7 +230,7 @@ int main( int argc, char *argv[] ) | |||
| glutInitWindowPosition( 0, 0 ); | |||
| glutInitWindowSize( 250, 250 ); | |||
| glutInitDisplayMode( GLUT_RGB | GLUT_SINGLE | GLUT_DEPTH ); | |||
| glutCreateWindow(argv[0]); | |||
| glutCreateWindow(argv[argc-1]); | |||
| glutReshapeFunc( Reshape ); | |||
| glutKeyboardFunc( Key ); | |||
| glutDisplayFunc( Display ); | |||
| @@ -26,12 +26,17 @@ C_SOURCES = \ | |||
| draw_pt_emit.c \ | |||
| draw_pt_fetch.c \ | |||
| draw_pt_fetch_emit.c \ | |||
| draw_pt_fetch_shade_emit.c \ | |||
| draw_pt_fetch_shade_pipeline.c \ | |||
| draw_pt_post_vs.c \ | |||
| draw_pt_util.c \ | |||
| draw_pt_varray.c \ | |||
| draw_pt_vcache.c \ | |||
| draw_vertex.c \ | |||
| draw_vs.c \ | |||
| draw_vs_varient.c \ | |||
| draw_vs_aos.c \ | |||
| draw_vs_aos_io.c \ | |||
| draw_vs_exec.c \ | |||
| draw_vs_llvm.c \ | |||
| draw_vs_sse.c | |||
| @@ -56,12 +56,6 @@ struct draw_context *draw_create( void ) | |||
| draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ | |||
| tgsi_exec_machine_init(&draw->machine); | |||
| /* FIXME: give this machine thing a proper constructor: | |||
| */ | |||
| draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
| draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
| if (!draw_pipeline_init( draw )) | |||
| goto fail; | |||
| @@ -69,6 +63,9 @@ struct draw_context *draw_create( void ) | |||
| if (!draw_pt_init( draw )) | |||
| goto fail; | |||
| if (!draw_vs_init( draw )) | |||
| goto fail; | |||
| return draw; | |||
| fail: | |||
| @@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw ) | |||
| return; | |||
| if (draw->machine.Inputs) | |||
| align_free(draw->machine.Inputs); | |||
| if (draw->machine.Outputs) | |||
| align_free(draw->machine.Outputs); | |||
| tgsi_exec_machine_free_data(&draw->machine); | |||
| /* Not so fast -- we're just borrowing this at the moment. | |||
| * | |||
| @@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw ) | |||
| draw_pipeline_destroy( draw ); | |||
| draw_pt_destroy( draw ); | |||
| draw_vs_destroy( draw ); | |||
| FREE( draw ); | |||
| } | |||
| @@ -295,7 +286,7 @@ int | |||
| draw_find_vs_output(struct draw_context *draw, | |||
| uint semantic_name, uint semantic_index) | |||
| { | |||
| const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
| uint i; | |||
| for (i = 0; i < vs->info.num_outputs; i++) { | |||
| if (vs->info.output_semantic_name[i] == semantic_name && | |||
| @@ -320,7 +311,7 @@ draw_find_vs_output(struct draw_context *draw, | |||
| uint | |||
| draw_num_vs_outputs(struct draw_context *draw) | |||
| { | |||
| uint count = draw->vertex_shader->info.num_outputs; | |||
| uint count = draw->vs.vertex_shader->info.num_outputs; | |||
| if (draw->extra_vp_outputs.slot > 0) | |||
| count++; | |||
| return count; | |||
| @@ -212,6 +212,71 @@ void draw_pipeline_run( struct draw_context *draw, | |||
| draw->pipeline.vertex_count = 0; | |||
| } | |||
| #define QUAD(i0,i1,i2,i3) \ | |||
| do_triangle( draw, \ | |||
| ( DRAW_PIPE_RESET_STIPPLE | \ | |||
| DRAW_PIPE_EDGE_FLAG_0 | \ | |||
| DRAW_PIPE_EDGE_FLAG_2 ), \ | |||
| verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ | |||
| verts + stride * (i1), \ | |||
| verts + stride * (i3)); \ | |||
| do_triangle( draw, \ | |||
| ( DRAW_PIPE_EDGE_FLAG_0 | \ | |||
| DRAW_PIPE_EDGE_FLAG_1 ), \ | |||
| verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ | |||
| verts + stride * (i2), \ | |||
| verts + stride * (i3)) | |||
| #define TRIANGLE(flags,i0,i1,i2) \ | |||
| do_triangle( draw, \ | |||
| flags, /* flags */ \ | |||
| verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ | |||
| verts + stride * (i1), \ | |||
| verts + stride * (i2)) | |||
| #define LINE(flags,i0,i1) \ | |||
| do_line( draw, \ | |||
| flags, \ | |||
| verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ | |||
| verts + stride * (i+1)) | |||
| #define POINT(i0) \ | |||
| do_point( draw, \ | |||
| verts + stride * i0 ) | |||
| #define FUNC pipe_run_linear | |||
| #define ARGS \ | |||
| struct draw_context *draw, \ | |||
| unsigned prim, \ | |||
| struct vertex_header *vertices, \ | |||
| unsigned stride | |||
| #define LOCAL_VARS \ | |||
| char *verts = (char *)vertices; \ | |||
| boolean flatfirst = (draw->rasterizer->flatshade && \ | |||
| draw->rasterizer->flatshade_first); \ | |||
| unsigned i, flags | |||
| #define FLUSH | |||
| #include "draw_pt_decompose.h" | |||
| void draw_pipeline_run_linear( struct draw_context *draw, | |||
| unsigned prim, | |||
| struct vertex_header *vertices, | |||
| unsigned count, | |||
| unsigned stride ) | |||
| { | |||
| char *verts = (char *)vertices; | |||
| draw->pipeline.verts = verts; | |||
| draw->pipeline.vertex_stride = stride; | |||
| draw->pipeline.vertex_count = count; | |||
| pipe_run_linear(draw, prim, vertices, stride, count); | |||
| draw->pipeline.verts = NULL; | |||
| draw->pipeline.vertex_count = 0; | |||
| } | |||
| void draw_pipeline_flush( struct draw_context *draw, | |||
| @@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage, | |||
| { | |||
| struct vertex_header *tmp = stage->tmp[idx]; | |||
| const uint vsize = sizeof(struct vertex_header) | |||
| + stage->draw->num_vs_outputs * 4 * sizeof(float); | |||
| + stage->draw->vs.num_vs_outputs * 4 * sizeof(float); | |||
| memcpy(tmp, vert, vsize); | |||
| tmp->vertex_id = UNDEFINED_VERTEX_ID; | |||
| return tmp; | |||
| @@ -653,7 +653,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) | |||
| } | |||
| /* update vertex attrib info */ | |||
| aaline->tex_slot = draw->num_vs_outputs; | |||
| aaline->tex_slot = draw->vs.num_vs_outputs; | |||
| assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ | |||
| /* advertise the extra post-transformed vertex attribute */ | |||
| @@ -681,7 +681,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) | |||
| bind_aapoint_fragment_shader(aapoint); | |||
| /* update vertex attrib info */ | |||
| aapoint->tex_slot = draw->num_vs_outputs; | |||
| aapoint->tex_slot = draw->vs.num_vs_outputs; | |||
| assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ | |||
| draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; | |||
| @@ -692,7 +692,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) | |||
| aapoint->psize_slot = -1; | |||
| if (draw->rasterizer->point_size_per_vertex) { | |||
| /* find PSIZ vertex output */ | |||
| const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
| uint i; | |||
| for (i = 0; i < vs->info.num_outputs; i++) { | |||
| if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { | |||
| @@ -112,7 +112,7 @@ static void interp( const struct clipper *clip, | |||
| const struct vertex_header *out, | |||
| const struct vertex_header *in ) | |||
| { | |||
| const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; | |||
| const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; | |||
| unsigned j; | |||
| /* Vertex header. | |||
| @@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, | |||
| header.flags |= edge_last; | |||
| if (0) { | |||
| const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
| uint j, k; | |||
| debug_printf("Clipped tri:\n"); | |||
| for (j = 0; j < 3; j++) { | |||
| @@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage ) | |||
| clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; | |||
| if (clipper->flat) { | |||
| const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
| uint i; | |||
| clipper->num_color_attribs = 0; | |||
| @@ -159,7 +159,7 @@ static void flatshade_line_1( struct draw_stage *stage, | |||
| static void flatshade_init_state( struct draw_stage *stage ) | |||
| { | |||
| struct flat_stage *flat = flat_stage(stage); | |||
| const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
| uint i; | |||
| /* Find which vertex shader outputs are colors, make a list */ | |||
| @@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw, | |||
| const struct vertex_header *v1 ) | |||
| { | |||
| uint attr; | |||
| for (attr = 0; attr < draw->num_vs_outputs; attr++) { | |||
| for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { | |||
| const float *val0 = v0->data[attr]; | |||
| const float *val1 = v1->data[attr]; | |||
| float *newv = dst->data[attr]; | |||
| @@ -175,6 +175,22 @@ reset_stipple_counter(struct draw_stage *stage) | |||
| stage->next->reset_stipple_counter( stage->next ); | |||
| } | |||
| static void | |||
| stipple_reset_point(struct draw_stage *stage, struct prim_header *header) | |||
| { | |||
| struct stipple_stage *stipple = stipple_stage(stage); | |||
| stipple->counter = 0; | |||
| stage->next->point(stage->next, header); | |||
| } | |||
| static void | |||
| stipple_reset_tri(struct draw_stage *stage, struct prim_header *header) | |||
| { | |||
| struct stipple_stage *stipple = stipple_stage(stage); | |||
| stipple->counter = 0; | |||
| stage->next->tri(stage->next, header); | |||
| } | |||
| static void | |||
| stipple_first_line(struct draw_stage *stage, | |||
| @@ -220,9 +236,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) | |||
| stipple->stage.draw = draw; | |||
| stipple->stage.next = NULL; | |||
| stipple->stage.point = draw_pipe_passthrough_point; | |||
| stipple->stage.point = stipple_reset_point; | |||
| stipple->stage.line = stipple_first_line; | |||
| stipple->stage.tri = draw_pipe_passthrough_tri; | |||
| stipple->stage.tri = stipple_reset_tri; | |||
| stipple->stage.reset_stipple_counter = reset_stipple_counter; | |||
| stipple->stage.flush = stipple_flush; | |||
| stipple->stage.destroy = stipple_destroy; | |||
| @@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage, | |||
| struct prim_header *header ) | |||
| { | |||
| struct twoside_stage *twoside = twoside_stage(stage); | |||
| const struct draw_vertex_shader *vs = stage->draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; | |||
| uint i; | |||
| twoside->attrib_front0 = 0; | |||
| @@ -197,7 +197,7 @@ static void widepoint_first_point( struct draw_stage *stage, | |||
| if (draw->rasterizer->point_sprite) { | |||
| /* find vertex shader texcoord outputs */ | |||
| const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
| uint i, j = 0; | |||
| for (i = 0; i < vs->info.num_outputs; i++) { | |||
| if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { | |||
| @@ -212,7 +212,7 @@ static void widepoint_first_point( struct draw_stage *stage, | |||
| wide->psize_slot = -1; | |||
| if (draw->rasterizer->point_size_per_vertex) { | |||
| /* find PSIZ vertex output */ | |||
| const struct draw_vertex_shader *vs = draw->vertex_shader; | |||
| const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
| uint i; | |||
| for (i = 0; i < vs->info.num_outputs; i++) { | |||
| if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { | |||
| @@ -124,6 +124,7 @@ struct draw_context | |||
| struct { | |||
| struct { | |||
| struct draw_pt_middle_end *fetch_emit; | |||
| struct draw_pt_middle_end *fetch_shade_emit; | |||
| struct draw_pt_middle_end *general; | |||
| } middle; | |||
| @@ -154,6 +155,7 @@ struct draw_context | |||
| const void *constants; | |||
| } user; | |||
| boolean test_fse; | |||
| } pt; | |||
| struct { | |||
| @@ -167,13 +169,26 @@ struct draw_context | |||
| /* pipe state that we need: */ | |||
| const struct pipe_rasterizer_state *rasterizer; | |||
| struct pipe_viewport_state viewport; | |||
| boolean identity_viewport; | |||
| struct draw_vertex_shader *vertex_shader; | |||
| struct { | |||
| struct draw_vertex_shader *vertex_shader; | |||
| uint num_vs_outputs; /**< convenience, from vertex_shader */ | |||
| boolean identity_viewport; | |||
| uint num_vs_outputs; /**< convenience, from vertex_shader */ | |||
| /** TGSI program interpreter runtime state */ | |||
| struct tgsi_exec_machine machine; | |||
| /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. | |||
| */ | |||
| struct gallivm_cpu_engine *engine; | |||
| struct translate *fetch; | |||
| struct translate_cache *fetch_cache; | |||
| struct translate *emit; | |||
| struct translate_cache *emit_cache; | |||
| } vs; | |||
| /* Clip derived state: | |||
| */ | |||
| @@ -190,16 +205,15 @@ struct draw_context | |||
| unsigned reduced_prim; | |||
| /** TGSI program interpreter runtime state */ | |||
| struct tgsi_exec_machine machine; | |||
| /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. | |||
| */ | |||
| struct gallivm_cpu_engine *engine; | |||
| void *driver_private; | |||
| }; | |||
| /******************************************************************************* | |||
| * Vertex shader code: | |||
| */ | |||
| boolean draw_vs_init( struct draw_context *draw ); | |||
| void draw_vs_destroy( struct draw_context *draw ); | |||
| @@ -247,6 +261,12 @@ void draw_pipeline_run( struct draw_context *draw, | |||
| const ushort *elts, | |||
| unsigned count ); | |||
| void draw_pipeline_run_linear( struct draw_context *draw, | |||
| unsigned prim, | |||
| struct vertex_header *vertices, | |||
| unsigned count, | |||
| unsigned stride ); | |||
| void draw_pipeline_flush( struct draw_context *draw, | |||
| @@ -64,7 +64,7 @@ draw_pt_arrays(struct draw_context *draw, | |||
| opt |= PT_PIPELINE; | |||
| } | |||
| if (!draw->bypass_clipping) { | |||
| if (!draw->bypass_clipping && !draw->pt.test_fse) { | |||
| opt |= PT_CLIPTEST; | |||
| } | |||
| @@ -72,16 +72,18 @@ draw_pt_arrays(struct draw_context *draw, | |||
| opt |= PT_SHADE; | |||
| } | |||
| if (opt) | |||
| middle = draw->pt.middle.general; | |||
| else | |||
| if (opt == 0) | |||
| middle = draw->pt.middle.fetch_emit; | |||
| else if (opt == PT_SHADE && draw->pt.test_fse) | |||
| middle = draw->pt.middle.fetch_shade_emit; | |||
| else | |||
| middle = draw->pt.middle.general; | |||
| /* Pick the right frontend | |||
| */ | |||
| if (draw->pt.user.elts || | |||
| count >= 256) { | |||
| if (draw->pt.user.elts || (opt & PT_PIPELINE)) { | |||
| frontend = draw->pt.front.vcache; | |||
| } else { | |||
| frontend = draw->pt.front.varray; | |||
| @@ -102,6 +104,8 @@ draw_pt_arrays(struct draw_context *draw, | |||
| boolean draw_pt_init( struct draw_context *draw ) | |||
| { | |||
| draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; | |||
| draw->pt.front.vcache = draw_pt_vcache( draw ); | |||
| if (!draw->pt.front.vcache) | |||
| return FALSE; | |||
| @@ -114,6 +118,13 @@ boolean draw_pt_init( struct draw_context *draw ) | |||
| if (!draw->pt.middle.fetch_emit) | |||
| return FALSE; | |||
| if (draw->pt.test_fse) { | |||
| draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); | |||
| if (!draw->pt.middle.fetch_shade_emit) | |||
| return FALSE; | |||
| } | |||
| draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); | |||
| if (!draw->pt.middle.general) | |||
| return FALSE; | |||
| @@ -134,6 +145,11 @@ void draw_pt_destroy( struct draw_context *draw ) | |||
| draw->pt.middle.fetch_emit = NULL; | |||
| } | |||
| if (draw->pt.middle.fetch_shade_emit) { | |||
| draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit ); | |||
| draw->pt.middle.fetch_shade_emit = NULL; | |||
| } | |||
| if (draw->pt.front.vcache) { | |||
| draw->pt.front.vcache->destroy( draw->pt.front.vcache ); | |||
| draw->pt.front.vcache = NULL; | |||
| @@ -147,19 +163,6 @@ void draw_pt_destroy( struct draw_context *draw ) | |||
| static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { | |||
| PIPE_PRIM_POINTS, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES | |||
| }; | |||
| /** | |||
| * Draw vertex arrays | |||
| @@ -172,9 +175,10 @@ void | |||
| draw_arrays(struct draw_context *draw, unsigned prim, | |||
| unsigned start, unsigned count) | |||
| { | |||
| if (reduced_prim[prim] != draw->reduced_prim) { | |||
| unsigned reduced_prim = draw_pt_reduced_prim(prim); | |||
| if (reduced_prim != draw->reduced_prim) { | |||
| draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); | |||
| draw->reduced_prim = reduced_prim[prim]; | |||
| draw->reduced_prim = reduced_prim; | |||
| } | |||
| /* drawing done here: */ | |||
| @@ -92,6 +92,10 @@ struct draw_pt_middle_end { | |||
| const ushort *draw_elts, | |||
| unsigned draw_count ); | |||
| void (*run_linear)(struct draw_pt_middle_end *, | |||
| unsigned start, | |||
| unsigned count); | |||
| void (*finish)( struct draw_pt_middle_end * ); | |||
| void (*destroy)( struct draw_pt_middle_end * ); | |||
| }; | |||
| @@ -117,6 +121,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, | |||
| struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); | |||
| struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); | |||
| /* Middle-ends: | |||
| * | |||
| * Currently one general-purpose case which can do all possibilities, | |||
| @@ -128,6 +133,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); | |||
| * vertex_elements. | |||
| */ | |||
| struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); | |||
| struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); | |||
| struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); | |||
| @@ -152,6 +158,13 @@ void draw_pt_emit( struct pt_emit *emit, | |||
| const ushort *elts, | |||
| unsigned count ); | |||
| void draw_pt_emit_linear( struct pt_emit *emit, | |||
| const float (*vertex_data)[4], | |||
| unsigned vertex_count, | |||
| unsigned stride, | |||
| unsigned start, | |||
| unsigned count ); | |||
| void draw_pt_emit_destroy( struct pt_emit *emit ); | |||
| struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); | |||
| @@ -170,6 +183,11 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, | |||
| unsigned count, | |||
| char *verts ); | |||
| void draw_pt_fetch_run_linear( struct pt_fetch *fetch, | |||
| unsigned start, | |||
| unsigned count, | |||
| char *verts ); | |||
| void draw_pt_fetch_destroy( struct pt_fetch *fetch ); | |||
| struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); | |||
| @@ -194,4 +212,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); | |||
| void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); | |||
| /******************************************************************************* | |||
| * Utils: | |||
| */ | |||
| void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr); | |||
| unsigned draw_pt_reduced_prim(unsigned prim); | |||
| #endif | |||
| @@ -0,0 +1,153 @@ | |||
| static void FUNC( ARGS, | |||
| unsigned count ) | |||
| { | |||
| LOCAL_VARS; | |||
| switch (prim) { | |||
| case PIPE_PRIM_POINTS: | |||
| for (i = 0; i < count; i ++) { | |||
| POINT( (i + 0) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINES: | |||
| for (i = 0; i+1 < count; i += 2) { | |||
| LINE( DRAW_PIPE_RESET_STIPPLE, | |||
| (i + 0), | |||
| (i + 1)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_LOOP: | |||
| if (count >= 2) { | |||
| flags = DRAW_PIPE_RESET_STIPPLE; | |||
| for (i = 1; i < count; i++, flags = 0) { | |||
| LINE( flags, | |||
| (i - 1), | |||
| (i )); | |||
| } | |||
| LINE( flags, | |||
| (i - 1), | |||
| (0 )); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| flags = DRAW_PIPE_RESET_STIPPLE; | |||
| for (i = 1; i < count; i++, flags = 0) { | |||
| LINE( flags, | |||
| (i - 1), | |||
| (i )); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLES: | |||
| for (i = 0; i+2 < count; i += 3) { | |||
| TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| (i + 0), | |||
| (i + 1), | |||
| (i + 2 )); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| if (flatfirst) { | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| (i + 0), | |||
| (i + 1 + (i&1)), | |||
| (i + 2 - (i&1))); | |||
| } | |||
| } | |||
| else { | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| (i + 0 + (i&1)), | |||
| (i + 1 - (i&1)), | |||
| (i + 2 )); | |||
| } | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_FAN: | |||
| if (count >= 3) { | |||
| if (flatfirst) { | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| (i + 1), | |||
| (i + 2), | |||
| (0 )); | |||
| } | |||
| } | |||
| else { | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| (0), | |||
| (i + 1), | |||
| (i + 2 )); | |||
| } | |||
| } | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUADS: | |||
| for (i = 0; i+3 < count; i += 4) { | |||
| QUAD( (i + 0), | |||
| (i + 1), | |||
| (i + 2), | |||
| (i + 3)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| for (i = 0; i+3 < count; i += 2) { | |||
| QUAD( (i + 2), | |||
| (i + 0), | |||
| (i + 1), | |||
| (i + 3)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_POLYGON: | |||
| { | |||
| /* These bitflags look a little odd because we submit the | |||
| * vertices as (1,2,0) to satisfy flatshade requirements. | |||
| */ | |||
| const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; | |||
| const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; | |||
| const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; | |||
| flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; | |||
| for (i = 0; i+2 < count; i++, flags = edge_middle) { | |||
| if (i + 3 == count) | |||
| flags |= edge_last; | |||
| TRIANGLE( flags, | |||
| (i + 1), | |||
| (i + 2), | |||
| (0)); | |||
| } | |||
| } | |||
| break; | |||
| default: | |||
| assert(0); | |||
| break; | |||
| } | |||
| FLUSH; | |||
| } | |||
| #undef TRIANGLE | |||
| #undef QUAD | |||
| #undef POINT | |||
| #undef LINE | |||
| #undef FUNC | |||
| @@ -40,6 +40,9 @@ struct pt_emit { | |||
| struct translate *translate; | |||
| struct translate_cache *cache; | |||
| unsigned prim; | |||
| const struct vertex_info *vinfo; | |||
| }; | |||
| void draw_pt_emit_prepare( struct pt_emit *emit, | |||
| @@ -51,8 +54,18 @@ void draw_pt_emit_prepare( struct pt_emit *emit, | |||
| struct translate_key hw_key; | |||
| unsigned i; | |||
| boolean ok; | |||
| /* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
| */ | |||
| draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
| ok = draw->render->set_primitive(draw->render, prim); | |||
| /* XXX: may need to defensively reset this later on as clipping can | |||
| * clobber this state in the render backend. | |||
| */ | |||
| emit->prim = prim; | |||
| ok = draw->render->set_primitive(draw->render, emit->prim); | |||
| if (!ok) { | |||
| assert(0); | |||
| return; | |||
| @@ -60,7 +73,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, | |||
| /* Must do this after set_primitive() above: | |||
| */ | |||
| vinfo = draw->render->get_vertex_info(draw->render); | |||
| emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render); | |||
| /* Translate from pipeline vertices to hw vertices. | |||
| @@ -100,6 +113,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, | |||
| case EMIT_4UB: | |||
| output_format = PIPE_FORMAT_B8G8R8A8_UNORM; | |||
| emit_sz = 4 * sizeof(ubyte); | |||
| break; | |||
| default: | |||
| assert(0); | |||
| output_format = PIPE_FORMAT_NONE; | |||
| @@ -144,6 +158,14 @@ void draw_pt_emit( struct pt_emit *emit, | |||
| */ | |||
| draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
| /* XXX: and work out some way to coordinate the render primitive | |||
| * between vbuf.c and here... | |||
| */ | |||
| if (!draw->render->set_primitive(draw->render, emit->prim)) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| hw_verts = render->allocate_vertices(render, | |||
| (ushort)translate->key.output_stride, | |||
| (ushort)vertex_count); | |||
| @@ -178,6 +200,72 @@ void draw_pt_emit( struct pt_emit *emit, | |||
| } | |||
| void draw_pt_emit_linear(struct pt_emit *emit, | |||
| const float (*vertex_data)[4], | |||
| unsigned vertex_count, | |||
| unsigned stride, | |||
| unsigned start, | |||
| unsigned count) | |||
| { | |||
| struct draw_context *draw = emit->draw; | |||
| struct translate *translate = emit->translate; | |||
| struct vbuf_render *render = draw->render; | |||
| void *hw_verts; | |||
| #if 0 | |||
| debug_printf("Linear emit\n"); | |||
| #endif | |||
| /* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
| */ | |||
| draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
| /* XXX: and work out some way to coordinate the render primitive | |||
| * between vbuf.c and here... | |||
| */ | |||
| if (!draw->render->set_primitive(draw->render, emit->prim)) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| hw_verts = render->allocate_vertices(render, | |||
| (ushort)translate->key.output_stride, | |||
| (ushort)count); | |||
| if (!hw_verts) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| translate->set_buffer(translate, 0, | |||
| vertex_data, stride); | |||
| translate->set_buffer(translate, 1, | |||
| &draw->rasterizer->point_size, | |||
| 0); | |||
| translate->run(translate, | |||
| 0, | |||
| vertex_count, | |||
| hw_verts); | |||
| if (0) { | |||
| unsigned i; | |||
| for (i = 0; i < vertex_count; i++) { | |||
| debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); | |||
| draw_dump_emitted_vertex( emit->vinfo, | |||
| (const uint8_t *)hw_verts + | |||
| translate->key.output_stride * i ); | |||
| } | |||
| } | |||
| render->draw_arrays(render, start, count); | |||
| render->release_vertices(render, | |||
| hw_verts, | |||
| translate->key.output_stride, | |||
| vertex_count); | |||
| } | |||
| struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) | |||
| { | |||
| struct pt_emit *emit = CALLOC_STRUCT(pt_emit); | |||
| @@ -166,6 +166,42 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, | |||
| } | |||
| void draw_pt_fetch_run_linear( struct pt_fetch *fetch, | |||
| unsigned start, | |||
| unsigned count, | |||
| char *verts ) | |||
| { | |||
| struct draw_context *draw = fetch->draw; | |||
| struct translate *translate = fetch->translate; | |||
| unsigned i; | |||
| for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { | |||
| translate->set_buffer(translate, | |||
| i, | |||
| ((char *)draw->pt.user.vbuffer[i] + | |||
| draw->pt.vertex_buffer[i].buffer_offset), | |||
| draw->pt.vertex_buffer[i].pitch ); | |||
| } | |||
| translate->run( translate, | |||
| start, | |||
| count, | |||
| verts ); | |||
| /* Edgeflags are hard to fit into a translate program, populate | |||
| * them separately if required. In the setup above they are | |||
| * defaulted to one, so only need this if there is reason to change | |||
| * that default: | |||
| */ | |||
| if (fetch->need_edgeflags) { | |||
| for (i = 0; i < count; i++) { | |||
| struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); | |||
| vh->edgeflag = draw_pt_get_edgeflag( draw, start + i ); | |||
| } | |||
| } | |||
| } | |||
| struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) | |||
| { | |||
| struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); | |||
| @@ -258,6 +258,59 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, | |||
| } | |||
| static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, | |||
| unsigned start, | |||
| unsigned count ) | |||
| { | |||
| struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; | |||
| struct draw_context *draw = feme->draw; | |||
| void *hw_verts; | |||
| /* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
| */ | |||
| draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
| hw_verts = draw->render->allocate_vertices( draw->render, | |||
| (ushort)feme->translate->key.output_stride, | |||
| (ushort)count ); | |||
| if (!hw_verts) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| /* Single routine to fetch vertices and emit HW verts. | |||
| */ | |||
| feme->translate->run( feme->translate, | |||
| start, | |||
| count, | |||
| hw_verts ); | |||
| if (0) { | |||
| unsigned i; | |||
| for (i = 0; i < count; i++) { | |||
| debug_printf("\n\nvertex %d:\n", i); | |||
| draw_dump_emitted_vertex( feme->vinfo, | |||
| (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); | |||
| } | |||
| } | |||
| /* XXX: Draw arrays path to avoid re-emitting index list again and | |||
| * again. | |||
| */ | |||
| draw->render->draw_arrays( draw->render, | |||
| 0, /*start*/ | |||
| count ); | |||
| /* Done -- that was easy, wasn't it: | |||
| */ | |||
| draw->render->release_vertices( draw->render, | |||
| hw_verts, | |||
| feme->translate->key.output_stride, | |||
| count ); | |||
| } | |||
| static void fetch_emit_finish( struct draw_pt_middle_end *middle ) | |||
| { | |||
| @@ -287,10 +340,11 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) | |||
| return NULL; | |||
| } | |||
| fetch_emit->base.prepare = fetch_emit_prepare; | |||
| fetch_emit->base.run = fetch_emit_run; | |||
| fetch_emit->base.finish = fetch_emit_finish; | |||
| fetch_emit->base.destroy = fetch_emit_destroy; | |||
| fetch_emit->base.prepare = fetch_emit_prepare; | |||
| fetch_emit->base.run = fetch_emit_run; | |||
| fetch_emit->base.run_linear = fetch_emit_run_linear; | |||
| fetch_emit->base.finish = fetch_emit_finish; | |||
| fetch_emit->base.destroy = fetch_emit_destroy; | |||
| fetch_emit->draw = draw; | |||
| @@ -0,0 +1,344 @@ | |||
| /************************************************************************** | |||
| * | |||
| * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
| * All Rights Reserved. | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the | |||
| * "Software"), to deal in the Software without restriction, including | |||
| * without limitation the rights to use, copy, modify, merge, publish, | |||
| * distribute, sub license, and/or sell copies of the Software, and to | |||
| * permit persons to whom the Software is furnished to do so, subject to | |||
| * the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the | |||
| * next paragraph) shall be included in all copies or substantial portions | |||
| * of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| * | |||
| **************************************************************************/ | |||
| /* | |||
| * Authors: | |||
| * Keith Whitwell <keith@tungstengraphics.com> | |||
| */ | |||
| #include "pipe/p_util.h" | |||
| #include "draw/draw_context.h" | |||
| #include "draw/draw_private.h" | |||
| #include "draw/draw_vbuf.h" | |||
| #include "draw/draw_vertex.h" | |||
| #include "draw/draw_pt.h" | |||
| #include "draw/draw_vs.h" | |||
| #include "translate/translate.h" | |||
| struct fetch_shade_emit; | |||
| /* Prototype fetch, shade, emit-hw-verts all in one go. | |||
| */ | |||
| struct fetch_shade_emit { | |||
| struct draw_pt_middle_end base; | |||
| struct draw_context *draw; | |||
| /* Temporaries: | |||
| */ | |||
| const float *constants; | |||
| unsigned pitch[PIPE_MAX_ATTRIBS]; | |||
| const ubyte *src[PIPE_MAX_ATTRIBS]; | |||
| unsigned prim; | |||
| struct draw_vs_varient_key key; | |||
| struct draw_vs_varient *active; | |||
| const struct vertex_info *vinfo; | |||
| }; | |||
| static void fse_prepare( struct draw_pt_middle_end *middle, | |||
| unsigned prim, | |||
| unsigned opt ) | |||
| { | |||
| struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; | |||
| struct draw_context *draw = fse->draw; | |||
| unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; | |||
| const struct vertex_info *vinfo; | |||
| unsigned i; | |||
| if (!draw->render->set_primitive( draw->render, | |||
| prim )) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| /* Must do this after set_primitive() above: | |||
| */ | |||
| fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render); | |||
| fse->key.output_stride = vinfo->size * 4; | |||
| fse->key.nr_outputs = vinfo->num_attribs; | |||
| fse->key.nr_inputs = num_vs_inputs; | |||
| fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */ | |||
| fse->key.nr_inputs); /* inputs - fetch from api format */ | |||
| fse->key.viewport = !draw->identity_viewport; | |||
| fse->key.clip = !draw->bypass_clipping; | |||
| fse->key.pad = 0; | |||
| memset(fse->key.element, 0, | |||
| fse->key.nr_elements * sizeof(fse->key.element[0])); | |||
| for (i = 0; i < num_vs_inputs; i++) { | |||
| const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; | |||
| fse->key.element[i].in.format = src->src_format; | |||
| /* Consider ignoring these, ie make generated programs | |||
| * independent of this state: | |||
| */ | |||
| fse->key.element[i].in.buffer = src->vertex_buffer_index; | |||
| fse->key.element[i].in.offset = src->src_offset; | |||
| } | |||
| { | |||
| unsigned dst_offset = 0; | |||
| for (i = 0; i < vinfo->num_attribs; i++) { | |||
| unsigned emit_sz = 0; | |||
| switch (vinfo->emit[i]) { | |||
| case EMIT_4F: | |||
| emit_sz = 4 * sizeof(float); | |||
| break; | |||
| case EMIT_3F: | |||
| emit_sz = 3 * sizeof(float); | |||
| break; | |||
| case EMIT_2F: | |||
| emit_sz = 2 * sizeof(float); | |||
| break; | |||
| case EMIT_1F: | |||
| emit_sz = 1 * sizeof(float); | |||
| break; | |||
| case EMIT_1F_PSIZE: | |||
| emit_sz = 1 * sizeof(float); | |||
| break; | |||
| case EMIT_4UB: | |||
| emit_sz = 4 * sizeof(ubyte); | |||
| break; | |||
| default: | |||
| assert(0); | |||
| break; | |||
| } | |||
| /* The elements in the key correspond to vertex shader output | |||
| * numbers, not to positions in the hw vertex description -- | |||
| * that's handled by the output_offset field. | |||
| */ | |||
| fse->key.element[i].out.format = vinfo->emit[i]; | |||
| fse->key.element[i].out.vs_output = vinfo->src_index[i]; | |||
| fse->key.element[i].out.offset = dst_offset; | |||
| dst_offset += emit_sz; | |||
| assert(fse->key.output_stride >= dst_offset); | |||
| } | |||
| } | |||
| /* Would normally look up a vertex shader and peruse its list of | |||
| * varients somehow. We omitted that step and put all the | |||
| * hardcoded "shaders" into an array. We're just making the | |||
| * assumption that this happens to be a matching shader... ie | |||
| * you're running isosurf, aren't you? | |||
| */ | |||
| fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, | |||
| &fse->key ); | |||
| if (!fse->active) { | |||
| assert(0); | |||
| return ; | |||
| } | |||
| /* Now set buffer pointers: | |||
| */ | |||
| for (i = 0; i < num_vs_inputs; i++) { | |||
| unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; | |||
| fse->active->set_input( fse->active, | |||
| i, | |||
| ((const ubyte *) draw->pt.user.vbuffer[buf] + | |||
| draw->pt.vertex_buffer[buf].buffer_offset), | |||
| draw->pt.vertex_buffer[buf].pitch ); | |||
| } | |||
| fse->active->set_constants( fse->active, | |||
| (const float (*)[4])draw->pt.user.constants ); | |||
| fse->active->set_viewport( fse->active, | |||
| &draw->viewport ); | |||
| //return TRUE; | |||
| } | |||
| static void fse_run_linear( struct draw_pt_middle_end *middle, | |||
| unsigned start, | |||
| unsigned count ) | |||
| { | |||
| struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; | |||
| struct draw_context *draw = fse->draw; | |||
| unsigned alloc_count = align(count, 4); | |||
| char *hw_verts; | |||
| /* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
| */ | |||
| draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
| hw_verts = draw->render->allocate_vertices( draw->render, | |||
| (ushort)fse->key.output_stride, | |||
| (ushort)alloc_count ); | |||
| if (!hw_verts) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| /* Single routine to fetch vertices, run shader and emit HW verts. | |||
| * Clipping is done elsewhere -- either by the API or on hardware, | |||
| * or for some other reason not required... | |||
| */ | |||
| fse->active->run_linear( fse->active, | |||
| start, count, | |||
| hw_verts ); | |||
| /* Draw arrays path to avoid re-emitting index list again and | |||
| * again. | |||
| */ | |||
| draw->render->draw_arrays( draw->render, | |||
| 0, | |||
| count ); | |||
| if (0) { | |||
| unsigned i; | |||
| for (i = 0; i < count; i++) { | |||
| debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i, | |||
| fse->key.output_stride, | |||
| fse->key.output_stride * i); | |||
| draw_dump_emitted_vertex( fse->vinfo, | |||
| (const uint8_t *)hw_verts + fse->key.output_stride * i ); | |||
| } | |||
| } | |||
| draw->render->release_vertices( draw->render, | |||
| hw_verts, | |||
| fse->key.output_stride, | |||
| count ); | |||
| } | |||
| static void | |||
| fse_run(struct draw_pt_middle_end *middle, | |||
| const unsigned *fetch_elts, | |||
| unsigned fetch_count, | |||
| const ushort *draw_elts, | |||
| unsigned draw_count ) | |||
| { | |||
| struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; | |||
| struct draw_context *draw = fse->draw; | |||
| unsigned alloc_count = align(fetch_count, 4); | |||
| void *hw_verts; | |||
| /* XXX: need to flush to get prim_vbuf.c to release its allocation?? | |||
| */ | |||
| draw_do_flush( draw, DRAW_FLUSH_BACKEND ); | |||
| hw_verts = draw->render->allocate_vertices( draw->render, | |||
| (ushort)fse->key.output_stride, | |||
| (ushort)alloc_count ); | |||
| if (!hw_verts) { | |||
| assert(0); | |||
| return; | |||
| } | |||
| /* Single routine to fetch vertices, run shader and emit HW verts. | |||
| */ | |||
| fse->active->run_elts( fse->active, | |||
| fetch_elts, | |||
| fetch_count, | |||
| hw_verts ); | |||
| draw->render->draw( draw->render, | |||
| draw_elts, | |||
| draw_count ); | |||
| if (0) { | |||
| unsigned i; | |||
| for (i = 0; i < fetch_count; i++) { | |||
| debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); | |||
| draw_dump_emitted_vertex( fse->vinfo, | |||
| (const uint8_t *)hw_verts + | |||
| fse->key.output_stride * i ); | |||
| } | |||
| } | |||
| draw->render->release_vertices( draw->render, | |||
| hw_verts, | |||
| fse->key.output_stride, | |||
| fetch_count ); | |||
| } | |||
| static void fse_finish( struct draw_pt_middle_end *middle ) | |||
| { | |||
| } | |||
| static void | |||
| fse_destroy( struct draw_pt_middle_end *middle ) | |||
| { | |||
| FREE(middle); | |||
| } | |||
| struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) | |||
| { | |||
| struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); | |||
| if (!fse) | |||
| return NULL; | |||
| fse->base.prepare = fse_prepare; | |||
| fse->base.run = fse_run; | |||
| fse->base.run_linear = fse_run_linear; | |||
| fse->base.finish = fse_finish; | |||
| fse->base.destroy = fse_destroy; | |||
| fse->draw = draw; | |||
| return &fse->base; | |||
| } | |||
| @@ -55,7 +55,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, | |||
| { | |||
| struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; | |||
| struct draw_context *draw = fpme->draw; | |||
| struct draw_vertex_shader *vs = draw->vertex_shader; | |||
| struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
| /* Add one to num_outputs because the pipeline occasionally tags on | |||
| * an additional texcoord, eg for AA lines. | |||
| @@ -107,7 +107,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, | |||
| { | |||
| struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; | |||
| struct draw_context *draw = fpme->draw; | |||
| struct draw_vertex_shader *shader = draw->vertex_shader; | |||
| struct draw_vertex_shader *shader = draw->vs.vertex_shader; | |||
| unsigned opt = fpme->opt; | |||
| unsigned alloc_count = align_int( fetch_count, 4 ); | |||
| @@ -162,7 +162,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, | |||
| fpme->vertex_size, | |||
| draw_elts, | |||
| draw_count ); | |||
| } | |||
| } | |||
| else { | |||
| draw_pt_emit( fpme->emit, | |||
| (const float (*)[4])pipeline_verts->data, | |||
| @@ -177,6 +177,79 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, | |||
| } | |||
| static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, | |||
| unsigned start, | |||
| unsigned count) | |||
| { | |||
| struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; | |||
| struct draw_context *draw = fpme->draw; | |||
| struct draw_vertex_shader *shader = draw->vs.vertex_shader; | |||
| unsigned opt = fpme->opt; | |||
| unsigned alloc_count = align_int( count, 4 ); | |||
| struct vertex_header *pipeline_verts = | |||
| (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); | |||
| if (!pipeline_verts) { | |||
| /* Not much we can do here - just skip the rendering. | |||
| */ | |||
| assert(0); | |||
| return; | |||
| } | |||
| /* Fetch into our vertex buffer | |||
| */ | |||
| draw_pt_fetch_run_linear( fpme->fetch, | |||
| start, | |||
| count, | |||
| (char *)pipeline_verts ); | |||
| /* Run the shader, note that this overwrites the data[] parts of | |||
| * the pipeline verts. If there is no shader, ie a bypass shader, | |||
| * then the inputs == outputs, and are already in the correct | |||
| * place. | |||
| */ | |||
| if (opt & PT_SHADE) | |||
| { | |||
| shader->run_linear(shader, | |||
| (const float (*)[4])pipeline_verts->data, | |||
| ( float (*)[4])pipeline_verts->data, | |||
| (const float (*)[4])draw->pt.user.constants, | |||
| count, | |||
| fpme->vertex_size, | |||
| fpme->vertex_size); | |||
| } | |||
| if (draw_pt_post_vs_run( fpme->post_vs, | |||
| pipeline_verts, | |||
| count, | |||
| fpme->vertex_size )) | |||
| { | |||
| opt |= PT_PIPELINE; | |||
| } | |||
| /* Do we need to run the pipeline? | |||
| */ | |||
| if (opt & PT_PIPELINE) { | |||
| draw_pipeline_run_linear( fpme->draw, | |||
| fpme->prim, | |||
| pipeline_verts, | |||
| count, | |||
| fpme->vertex_size); | |||
| } | |||
| else { | |||
| draw_pt_emit_linear( fpme->emit, | |||
| (const float (*)[4])pipeline_verts->data, | |||
| count, | |||
| fpme->vertex_size, | |||
| 0, /*start*/ | |||
| count ); | |||
| } | |||
| FREE(pipeline_verts); | |||
| } | |||
| static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) | |||
| { | |||
| @@ -206,10 +279,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * | |||
| if (!fpme) | |||
| goto fail; | |||
| fpme->base.prepare = fetch_pipeline_prepare; | |||
| fpme->base.run = fetch_pipeline_run; | |||
| fpme->base.finish = fetch_pipeline_finish; | |||
| fpme->base.destroy = fetch_pipeline_destroy; | |||
| fpme->base.prepare = fetch_pipeline_prepare; | |||
| fpme->base.run = fetch_pipeline_run; | |||
| fpme->base.run_linear = fetch_pipeline_linear_run; | |||
| fpme->base.finish = fetch_pipeline_finish; | |||
| fpme->base.destroy = fetch_pipeline_destroy; | |||
| fpme->draw = draw; | |||
| @@ -0,0 +1,103 @@ | |||
| /************************************************************************** | |||
| * | |||
| * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
| * All Rights Reserved. | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the | |||
| * "Software"), to deal in the Software without restriction, including | |||
| * without limitation the rights to use, copy, modify, merge, publish, | |||
| * distribute, sub license, and/or sell copies of the Software, and to | |||
| * permit persons to whom the Software is furnished to do so, subject to | |||
| * the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the | |||
| * next paragraph) shall be included in all copies or substantial portions | |||
| * of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| * | |||
| **************************************************************************/ | |||
| /* | |||
| * Authors: | |||
| * Keith Whitwell <keith@tungstengraphics.com> | |||
| */ | |||
| #include "pipe/p_util.h" | |||
| #include "draw/draw_context.h" | |||
| #include "draw/draw_private.h" | |||
| #include "draw/draw_pt.h" | |||
| void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) | |||
| { | |||
| switch (prim) { | |||
| case PIPE_PRIM_POINTS: | |||
| *first = 1; | |||
| *incr = 1; | |||
| break; | |||
| case PIPE_PRIM_LINES: | |||
| *first = 2; | |||
| *incr = 2; | |||
| break; | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| case PIPE_PRIM_LINE_LOOP: | |||
| *first = 2; | |||
| *incr = 1; | |||
| break; | |||
| case PIPE_PRIM_TRIANGLES: | |||
| *first = 3; | |||
| *incr = 3; | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| case PIPE_PRIM_TRIANGLE_FAN: | |||
| case PIPE_PRIM_POLYGON: | |||
| *first = 3; | |||
| *incr = 1; | |||
| break; | |||
| case PIPE_PRIM_QUADS: | |||
| *first = 4; | |||
| *incr = 4; | |||
| break; | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| *first = 4; | |||
| *incr = 2; | |||
| break; | |||
| default: | |||
| assert(0); | |||
| *first = 0; | |||
| *incr = 1; /* set to one so that count % incr works */ | |||
| break; | |||
| } | |||
| } | |||
| unsigned draw_pt_reduced_prim(unsigned prim) | |||
| { | |||
| switch (prim) { | |||
| case PIPE_PRIM_POINTS: | |||
| return PIPE_PRIM_POINTS; | |||
| case PIPE_PRIM_LINES: | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| case PIPE_PRIM_LINE_LOOP: | |||
| return PIPE_PRIM_LINES; | |||
| case PIPE_PRIM_TRIANGLES: | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| case PIPE_PRIM_TRIANGLE_FAN: | |||
| case PIPE_PRIM_POLYGON: | |||
| case PIPE_PRIM_QUADS: | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| return PIPE_PRIM_TRIANGLES; | |||
| default: | |||
| assert(0); | |||
| return PIPE_PRIM_POINTS; | |||
| } | |||
| } | |||
| @@ -43,6 +43,8 @@ struct varray_frontend { | |||
| unsigned draw_count; | |||
| unsigned fetch_count; | |||
| unsigned fetch_start; | |||
| struct draw_pt_middle_end *middle; | |||
| unsigned input_prim; | |||
| @@ -56,6 +58,11 @@ static void varray_flush(struct varray_frontend *varray) | |||
| debug_printf("FLUSH fc = %d, dc = %d\n", | |||
| varray->fetch_count, | |||
| varray->draw_count); | |||
| debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n", | |||
| varray->fetch_elts[0], | |||
| varray->fetch_elts[varray->fetch_count-1], | |||
| varray->draw_elts[0], | |||
| varray->draw_elts[varray->draw_count-1]); | |||
| #endif | |||
| varray->middle->run(varray->middle, | |||
| varray->fetch_elts, | |||
| @@ -68,20 +75,43 @@ static void varray_flush(struct varray_frontend *varray) | |||
| varray->draw_count = 0; | |||
| } | |||
| #if 0 | |||
| static void varray_check_flush(struct varray_frontend *varray) | |||
| static void varray_flush_linear(struct varray_frontend *varray, | |||
| unsigned start, unsigned count) | |||
| { | |||
| if (varray->draw_count + 6 >= DRAW_MAX/* || | |||
| varray->fetch_count + 4 >= FETCH_MAX*/) { | |||
| varray_flush(varray); | |||
| if (count) { | |||
| #if 0 | |||
| debug_printf("FLUSH LINEAR start = %d, count = %d\n", | |||
| start, | |||
| count); | |||
| #endif | |||
| assert(varray->middle->run_linear); | |||
| varray->middle->run_linear(varray->middle, start, count); | |||
| } | |||
| } | |||
| static INLINE void fetch_init(struct varray_frontend *varray, | |||
| unsigned count) | |||
| { | |||
| unsigned idx; | |||
| #if 0 | |||
| debug_printf("FETCH INIT c = %d, fs = %d\n", | |||
| count, | |||
| varray->fetch_start); | |||
| #endif | |||
| for (idx = 0; idx < count; ++idx) { | |||
| varray->fetch_elts[idx] = varray->fetch_start + idx; | |||
| } | |||
| varray->fetch_start += idx; | |||
| varray->fetch_count = idx; | |||
| } | |||
| static INLINE void add_draw_el(struct varray_frontend *varray, | |||
| int idx, ushort flags) | |||
| int idx) | |||
| { | |||
| varray->draw_elts[varray->draw_count++] = idx | flags; | |||
| varray->draw_elts[varray->draw_count++] = idx; | |||
| } | |||
| @@ -90,106 +120,52 @@ static INLINE void varray_triangle( struct varray_frontend *varray, | |||
| unsigned i1, | |||
| unsigned i2 ) | |||
| { | |||
| add_draw_el(varray, i0, 0); | |||
| add_draw_el(varray, i1, 0); | |||
| add_draw_el(varray, i2, 0); | |||
| } | |||
| static INLINE void varray_triangle_flags( struct varray_frontend *varray, | |||
| ushort flags, | |||
| unsigned i0, | |||
| unsigned i1, | |||
| unsigned i2 ) | |||
| { | |||
| add_draw_el(varray, i0, flags); | |||
| add_draw_el(varray, i1, 0); | |||
| add_draw_el(varray, i2, 0); | |||
| add_draw_el(varray, i0); | |||
| add_draw_el(varray, i1); | |||
| add_draw_el(varray, i2); | |||
| } | |||
| static INLINE void varray_line( struct varray_frontend *varray, | |||
| unsigned i0, | |||
| unsigned i1 ) | |||
| { | |||
| add_draw_el(varray, i0, 0); | |||
| add_draw_el(varray, i1, 0); | |||
| } | |||
| static INLINE void varray_line_flags( struct varray_frontend *varray, | |||
| ushort flags, | |||
| unsigned i0, | |||
| unsigned i1 ) | |||
| { | |||
| add_draw_el(varray, i0, flags); | |||
| add_draw_el(varray, i1, 0); | |||
| add_draw_el(varray, i0); | |||
| add_draw_el(varray, i1); | |||
| } | |||
| static INLINE void varray_point( struct varray_frontend *varray, | |||
| unsigned i0 ) | |||
| { | |||
| add_draw_el(varray, i0, 0); | |||
| } | |||
| static INLINE void varray_quad( struct varray_frontend *varray, | |||
| unsigned i0, | |||
| unsigned i1, | |||
| unsigned i2, | |||
| unsigned i3 ) | |||
| { | |||
| varray_triangle( varray, i0, i1, i3 ); | |||
| varray_triangle( varray, i1, i2, i3 ); | |||
| } | |||
| static INLINE void varray_ef_quad( struct varray_frontend *varray, | |||
| unsigned i0, | |||
| unsigned i1, | |||
| unsigned i2, | |||
| unsigned i3 ) | |||
| { | |||
| const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; | |||
| const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; | |||
| varray_triangle_flags( varray, | |||
| DRAW_PIPE_RESET_STIPPLE | omitEdge1, | |||
| i0, i1, i3 ); | |||
| varray_triangle_flags( varray, | |||
| omitEdge2, | |||
| i1, i2, i3 ); | |||
| add_draw_el(varray, i0); | |||
| } | |||
| /* At least for now, we're back to using a template include file for | |||
| * this. The two paths aren't too different though - it may be | |||
| * possible to reunify them. | |||
| */ | |||
| #define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2) | |||
| #define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3) | |||
| #define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1) | |||
| #define POINT(vc,i0) varray_point(vc,i0) | |||
| #define FUNC varray_run_extras | |||
| #include "draw_pt_varray_tmp.h" | |||
| #define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2) | |||
| #define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3) | |||
| #define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1) | |||
| #if 0 | |||
| #define TRIANGLE(flags,i0,i1,i2) varray_triangle(varray,i0,i1,i2) | |||
| #define LINE(flags,i0,i1) varray_line(varray,i0,i1) | |||
| #define POINT(i0) varray_point(varray,i0) | |||
| #define FUNC varray_decompose | |||
| #include "draw_pt_decompose.h" | |||
| #else | |||
| #define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2) | |||
| #define LINE(vc,i0,i1) varray_line(vc,i0,i1) | |||
| #define POINT(vc,i0) varray_point(vc,i0) | |||
| #define FUNC varray_run | |||
| #include "draw_pt_varray_tmp.h" | |||
| #include "draw_pt_varray_tmp_linear.h" | |||
| #endif | |||
| static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { | |||
| static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { | |||
| PIPE_PRIM_POINTS, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, /* decomposed LINELOOP */ | |||
| PIPE_PRIM_LINE_STRIP, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES | |||
| PIPE_PRIM_TRIANGLE_STRIP, | |||
| PIPE_PRIM_TRIANGLES, /* decomposed TRI_FAN */ | |||
| PIPE_PRIM_QUADS, | |||
| PIPE_PRIM_QUAD_STRIP, | |||
| PIPE_PRIM_TRIANGLES /* decomposed POLYGON */ | |||
| }; | |||
| @@ -201,17 +177,10 @@ static void varray_prepare(struct draw_pt_front_end *frontend, | |||
| { | |||
| struct varray_frontend *varray = (struct varray_frontend *)frontend; | |||
| if (opt & PT_PIPELINE) | |||
| { | |||
| varray->base.run = varray_run_extras; | |||
| } | |||
| else | |||
| { | |||
| varray->base.run = varray_run; | |||
| } | |||
| varray->base.run = varray_run; | |||
| varray->input_prim = prim; | |||
| varray->output_prim = reduced_prim[prim]; | |||
| varray->output_prim = decompose_prim[prim]; | |||
| varray->middle = middle; | |||
| middle->prepare(middle, varray->output_prim, opt); | |||
| @@ -10,32 +10,44 @@ static void FUNC(struct draw_pt_front_end *frontend, | |||
| boolean flatfirst = (draw->rasterizer->flatshade && | |||
| draw->rasterizer->flatshade_first); | |||
| unsigned i, flags; | |||
| unsigned i, j, flags; | |||
| unsigned first, incr; | |||
| varray->fetch_start = start; | |||
| draw_pt_split_prim(varray->input_prim, &first, &incr); | |||
| #if 0 | |||
| debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count); | |||
| #endif | |||
| #if 0 | |||
| debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim, | |||
| debug_printf("%s (%d) %d/%d\n", __FUNCTION__, | |||
| varray->input_prim, | |||
| start, count); | |||
| #endif | |||
| for (i = 0; i < count; ++i) { | |||
| varray->fetch_elts[i] = start + i; | |||
| } | |||
| varray->fetch_count = count; | |||
| switch (varray->input_prim) { | |||
| case PIPE_PRIM_POINTS: | |||
| for (i = 0; i < count; i ++) { | |||
| POINT(varray, i + 0); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i < end; i++) { | |||
| POINT(varray, i + 0); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINES: | |||
| for (i = 0; i+1 < count; i += 2) { | |||
| LINE(varray, DRAW_PIPE_RESET_STIPPLE, | |||
| i + 0, i + 1); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+1 < end; i += 2) { | |||
| LINE(varray, DRAW_PIPE_RESET_STIPPLE, | |||
| i + 0, i + 1); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| @@ -43,38 +55,81 @@ static void FUNC(struct draw_pt_front_end *frontend, | |||
| if (count >= 2) { | |||
| flags = DRAW_PIPE_RESET_STIPPLE; | |||
| for (i = 1; i < count; i++, flags = 0) { | |||
| LINE(varray, flags, i - 1, i); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 1; i < end; i++, flags = 0) { | |||
| LINE(varray, flags, i - 1, i); | |||
| } | |||
| LINE(varray, flags, i - 1, 0); | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| LINE(varray, flags, i - 1, 0); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| flags = DRAW_PIPE_RESET_STIPPLE; | |||
| for (i = 1; i < count; i++, flags = 0) { | |||
| LINE(varray, flags, i - 1, i); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 1; i < end; i++, flags = 0) { | |||
| LINE(varray, flags, i - 1, i); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLES: | |||
| for (i = 0; i+2 < count; i += 3) { | |||
| TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| i + 0, i + 1, i + 2); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+2 < end; i += 3) { | |||
| TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| i + 0, i + 1, i + 2); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| if (flatfirst) { | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| i + 0, i + 1 + (i&1), i + 2 - (i&1)); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+2 < end; i++) { | |||
| TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| i + 0, i + 1 + (i&1), i + 2 - (i&1)); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| if (j + first + i <= count) { | |||
| varray->fetch_start -= 2; | |||
| i -= 2; | |||
| } | |||
| } | |||
| } | |||
| else { | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| i + 0 + (i&1), i + 1 - (i&1), i + 2); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i + 2 < end; i++) { | |||
| TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, | |||
| i + 0 + (i&1), i + 1 - (i&1), i + 2); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| if (j + first + i <= count) { | |||
| varray->fetch_start -= 2; | |||
| i -= 2; | |||
| } | |||
| } | |||
| } | |||
| break; | |||
| @@ -83,51 +138,89 @@ static void FUNC(struct draw_pt_front_end *frontend, | |||
| if (count >= 3) { | |||
| if (flatfirst) { | |||
| flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+2 < end; i++) { | |||
| TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| } | |||
| else { | |||
| flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; | |||
| for (i = 0; i+2 < count; i++) { | |||
| TRIANGLE(varray, flags, 0, i + 1, i + 2); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+2 < end; i++) { | |||
| TRIANGLE(varray, flags, 0, i + 1, i + 2); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| } | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUADS: | |||
| for (i = 0; i+3 < count; i += 4) { | |||
| QUAD(varray, i + 0, i + 1, i + 2, i + 3); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+3 < end; i += 4) { | |||
| QUAD(varray, i + 0, i + 1, i + 2, i + 3); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| for (i = 0; i+3 < count; i += 2) { | |||
| QUAD(varray, i + 2, i + 0, i + 1, i + 3); | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+3 < end; i += 2) { | |||
| QUAD(varray, i + 2, i + 0, i + 1, i + 3); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| if (j + first + i <= count) { | |||
| varray->fetch_start -= 2; | |||
| i -= 2; | |||
| } | |||
| } | |||
| break; | |||
| case PIPE_PRIM_POLYGON: | |||
| { | |||
| /* These bitflags look a little odd because we submit the | |||
| * vertices as (1,2,0) to satisfy flatshade requirements. | |||
| */ | |||
| const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; | |||
| const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; | |||
| const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; | |||
| flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; | |||
| for (i = 0; i+2 < count; i++, flags = edge_middle) { | |||
| /* These bitflags look a little odd because we submit the | |||
| * vertices as (1,2,0) to satisfy flatshade requirements. | |||
| */ | |||
| const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; | |||
| const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; | |||
| const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; | |||
| flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 0; i+2 < end; i++, flags = edge_middle) { | |||
| if (i + 3 == count) | |||
| flags |= edge_last; | |||
| TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
| } | |||
| TRIANGLE(varray, flags, i + 1, i + 2, 0); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| } | |||
| break; | |||
| default: | |||
| assert(0); | |||
| @@ -0,0 +1,94 @@ | |||
| static unsigned trim( unsigned count, unsigned first, unsigned incr ) | |||
| { | |||
| return count - (count - first) % incr; | |||
| } | |||
| static void FUNC(struct draw_pt_front_end *frontend, | |||
| pt_elt_func get_elt, | |||
| const void *elts, | |||
| unsigned count) | |||
| { | |||
| struct varray_frontend *varray = (struct varray_frontend *)frontend; | |||
| unsigned start = (unsigned)elts; | |||
| unsigned i, j; | |||
| unsigned first, incr; | |||
| varray->fetch_start = start; | |||
| draw_pt_split_prim(varray->input_prim, &first, &incr); | |||
| /* Sanitize primitive length: | |||
| */ | |||
| count = trim(count, first, incr); | |||
| if (count < first) | |||
| return; | |||
| #if 0 | |||
| debug_printf("%s (%d) %d/%d\n", __FUNCTION__, | |||
| varray->input_prim, | |||
| start, count); | |||
| #endif | |||
| switch (varray->input_prim) { | |||
| case PIPE_PRIM_POINTS: | |||
| case PIPE_PRIM_LINES: | |||
| case PIPE_PRIM_TRIANGLES: | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| case PIPE_PRIM_QUADS: | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| for (j = 0; j < count;) { | |||
| unsigned remaining = count - j; | |||
| unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr ); | |||
| varray_flush_linear(varray, start + j, nr); | |||
| j += nr; | |||
| if (nr != remaining) | |||
| j -= (first - incr); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_LOOP: | |||
| if (count >= 2) { | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 1; i < end; i++) { | |||
| LINE(varray, i - 1, i); | |||
| } | |||
| LINE(varray, i - 1, 0); | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| } | |||
| break; | |||
| case PIPE_PRIM_POLYGON: | |||
| case PIPE_PRIM_TRIANGLE_FAN: | |||
| for (j = 0; j + first <= count; j += i) { | |||
| unsigned end = MIN2(FETCH_MAX, count - j); | |||
| end -= (end % incr); | |||
| for (i = 2; i < end; i++) { | |||
| TRIANGLE(varray, 0, i - 1, i); | |||
| } | |||
| i = end; | |||
| fetch_init(varray, end); | |||
| varray_flush(varray); | |||
| } | |||
| break; | |||
| default: | |||
| assert(0); | |||
| break; | |||
| } | |||
| varray_flush(varray); | |||
| } | |||
| #undef TRIANGLE | |||
| #undef QUAD | |||
| #undef POINT | |||
| #undef LINE | |||
| #undef FUNC | |||
| @@ -171,15 +171,15 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, | |||
| unsigned i2, | |||
| unsigned i3 ) | |||
| { | |||
| const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; | |||
| const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; | |||
| vcache_triangle_flags( vcache, | |||
| DRAW_PIPE_RESET_STIPPLE | omitEdge1, | |||
| vcache_triangle_flags( vcache, | |||
| ( DRAW_PIPE_RESET_STIPPLE | | |||
| DRAW_PIPE_EDGE_FLAG_0 | | |||
| DRAW_PIPE_EDGE_FLAG_2 ), | |||
| i0, i1, i3 ); | |||
| vcache_triangle_flags( vcache, | |||
| omitEdge2, | |||
| vcache_triangle_flags( vcache, | |||
| ( DRAW_PIPE_EDGE_FLAG_0 | | |||
| DRAW_PIPE_EDGE_FLAG_1 ), | |||
| i1, i2, i3 ); | |||
| } | |||
| @@ -204,19 +204,6 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, | |||
| static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { | |||
| PIPE_PRIM_POINTS, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_LINES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES, | |||
| PIPE_PRIM_TRIANGLES | |||
| }; | |||
| static void vcache_prepare( struct draw_pt_front_end *frontend, | |||
| @@ -236,7 +223,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, | |||
| } | |||
| vcache->input_prim = prim; | |||
| vcache->output_prim = reduced_prim[prim]; | |||
| vcache->output_prim = draw_pt_reduced_prim(prim); | |||
| vcache->middle = middle; | |||
| middle->prepare( middle, vcache->output_prim, opt ); | |||
| @@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo); | |||
| void draw_dump_emitted_vertex(const struct vertex_info *vinfo, | |||
| const uint8_t *data); | |||
| static INLINE unsigned draw_translate_vinfo_format(unsigned format ) | |||
| { | |||
| switch (format) { | |||
| case EMIT_1F: | |||
| case EMIT_1F_PSIZE: | |||
| return PIPE_FORMAT_R32_FLOAT; | |||
| case EMIT_2F: | |||
| return PIPE_FORMAT_R32G32_FLOAT; | |||
| case EMIT_3F: | |||
| return PIPE_FORMAT_R32G32B32_FLOAT; | |||
| case EMIT_4F: | |||
| return PIPE_FORMAT_R32G32B32A32_FLOAT; | |||
| case EMIT_4UB: | |||
| return PIPE_FORMAT_R8G8B8A8_UNORM; | |||
| default: | |||
| return PIPE_FORMAT_NONE; | |||
| } | |||
| } | |||
| #endif /* DRAW_VERTEX_H */ | |||
| @@ -36,6 +36,8 @@ | |||
| #include "draw_private.h" | |||
| #include "draw_context.h" | |||
| #include "draw_vs.h" | |||
| #include "translate/translate.h" | |||
| #include "translate/translate_cache.h" | |||
| @@ -66,13 +68,13 @@ draw_bind_vertex_shader(struct draw_context *draw, | |||
| if (dvs) | |||
| { | |||
| draw->vertex_shader = dvs; | |||
| draw->num_vs_outputs = dvs->info.num_outputs; | |||
| draw->vs.vertex_shader = dvs; | |||
| draw->vs.num_vs_outputs = dvs->info.num_outputs; | |||
| dvs->prepare( dvs, draw ); | |||
| } | |||
| else { | |||
| draw->vertex_shader = NULL; | |||
| draw->num_vs_outputs = 0; | |||
| draw->vs.vertex_shader = NULL; | |||
| draw->vs.num_vs_outputs = 0; | |||
| } | |||
| } | |||
| @@ -83,3 +85,109 @@ draw_delete_vertex_shader(struct draw_context *draw, | |||
| { | |||
| dvs->delete( dvs ); | |||
| } | |||
| boolean | |||
| draw_vs_init( struct draw_context *draw ) | |||
| { | |||
| tgsi_exec_machine_init(&draw->vs.machine); | |||
| /* FIXME: give this machine thing a proper constructor: | |||
| */ | |||
| draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
| if (!draw->vs.machine.Inputs) | |||
| return FALSE; | |||
| draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); | |||
| if (!draw->vs.machine.Outputs) | |||
| return FALSE; | |||
| draw->vs.emit_cache = translate_cache_create(); | |||
| if (!draw->vs.emit_cache) | |||
| return FALSE; | |||
| draw->vs.fetch_cache = translate_cache_create(); | |||
| if (!draw->vs.fetch_cache) | |||
| return FALSE; | |||
| return TRUE; | |||
| } | |||
| void | |||
| draw_vs_destroy( struct draw_context *draw ) | |||
| { | |||
| if (draw->vs.machine.Inputs) | |||
| align_free(draw->vs.machine.Inputs); | |||
| if (draw->vs.machine.Outputs) | |||
| align_free(draw->vs.machine.Outputs); | |||
| if (draw->vs.fetch_cache) | |||
| translate_cache_destroy(draw->vs.fetch_cache); | |||
| if (draw->vs.emit_cache) | |||
| translate_cache_destroy(draw->vs.emit_cache); | |||
| tgsi_exec_machine_free_data(&draw->vs.machine); | |||
| } | |||
| struct draw_vs_varient * | |||
| draw_vs_lookup_varient( struct draw_vertex_shader *vs, | |||
| const struct draw_vs_varient_key *key ) | |||
| { | |||
| struct draw_vs_varient *varient; | |||
| unsigned i; | |||
| /* Lookup existing varient: | |||
| */ | |||
| for (i = 0; i < vs->nr_varients; i++) | |||
| if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) | |||
| return vs->varient[i]; | |||
| /* Else have to create a new one: | |||
| */ | |||
| varient = vs->create_varient( vs, key ); | |||
| if (varient == NULL) | |||
| return NULL; | |||
| /* Add it to our list: | |||
| */ | |||
| assert(vs->nr_varients < Elements(vs->varient)); | |||
| vs->varient[vs->nr_varients++] = varient; | |||
| /* Done | |||
| */ | |||
| return varient; | |||
| } | |||
| struct translate * | |||
| draw_vs_get_fetch( struct draw_context *draw, | |||
| struct translate_key *key ) | |||
| { | |||
| if (!draw->vs.fetch || | |||
| translate_key_compare(&draw->vs.fetch->key, key) != 0) | |||
| { | |||
| translate_key_sanitize(key); | |||
| draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key); | |||
| } | |||
| return draw->vs.fetch; | |||
| } | |||
| struct translate * | |||
| draw_vs_get_emit( struct draw_context *draw, | |||
| struct translate_key *key ) | |||
| { | |||
| if (!draw->vs.emit || | |||
| translate_key_compare(&draw->vs.emit->key, key) != 0) | |||
| { | |||
| translate_key_sanitize(key); | |||
| draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key); | |||
| } | |||
| return draw->vs.emit; | |||
| } | |||
| @@ -38,10 +38,84 @@ | |||
| struct draw_context; | |||
| struct pipe_shader_state; | |||
| struct draw_varient_input | |||
| { | |||
| enum pipe_format format; | |||
| unsigned buffer; | |||
| unsigned offset; | |||
| }; | |||
| struct draw_varient_output | |||
| { | |||
| enum pipe_format format; /* output format */ | |||
| unsigned vs_output:8; /* which vertex shader output is this? */ | |||
| unsigned offset:24; /* offset into output vertex */ | |||
| }; | |||
| struct draw_varient_element { | |||
| struct draw_varient_input in; | |||
| struct draw_varient_output out; | |||
| }; | |||
| struct draw_vs_varient_key { | |||
| unsigned output_stride; | |||
| unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */ | |||
| unsigned nr_inputs:8; | |||
| unsigned nr_outputs:8; | |||
| unsigned viewport:1; | |||
| unsigned clip:1; | |||
| unsigned pad:5; | |||
| struct draw_varient_element element[PIPE_MAX_ATTRIBS]; | |||
| }; | |||
| struct draw_vs_varient; | |||
| typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer); | |||
| typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer); | |||
| struct draw_vs_varient { | |||
| struct draw_vs_varient_key key; | |||
| struct draw_vertex_shader *vs; | |||
| void (*set_input)( struct draw_vs_varient *, | |||
| unsigned i, | |||
| const void *ptr, | |||
| unsigned stride ); | |||
| void (*set_constants)( struct draw_vs_varient *, | |||
| const float (*constants)[4] ); | |||
| void (*set_viewport)( struct draw_vs_varient *, | |||
| const struct pipe_viewport_state * ); | |||
| void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer ); | |||
| void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer ); | |||
| void (*destroy)( struct draw_vs_varient * ); | |||
| }; | |||
| /** | |||
| * Private version of the compiled vertex_shader | |||
| */ | |||
| struct draw_vertex_shader { | |||
| struct draw_context *draw; | |||
| /* This member will disappear shortly: | |||
| */ | |||
| @@ -49,6 +123,14 @@ struct draw_vertex_shader { | |||
| struct tgsi_shader_info info; | |||
| /* | |||
| */ | |||
| struct draw_vs_varient *varient[16]; | |||
| unsigned nr_varients; | |||
| struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, | |||
| const struct draw_vs_varient_key *key ); | |||
| void (*prepare)( struct draw_vertex_shader *shader, | |||
| struct draw_context *draw ); | |||
| @@ -68,6 +150,15 @@ struct draw_vertex_shader { | |||
| }; | |||
| struct draw_vs_varient * | |||
| draw_vs_lookup_varient( struct draw_vertex_shader *base, | |||
| const struct draw_vs_varient_key *key ); | |||
| /******************************************************************************** | |||
| * Internal functions: | |||
| */ | |||
| struct draw_vertex_shader * | |||
| draw_create_vs_exec(struct draw_context *draw, | |||
| const struct pipe_shader_state *templ); | |||
| @@ -81,7 +172,52 @@ draw_create_vs_llvm(struct draw_context *draw, | |||
| const struct pipe_shader_state *templ); | |||
| struct draw_vs_varient_key; | |||
| struct draw_vertex_shader; | |||
| struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, | |||
| const struct draw_vs_varient_key *key ); | |||
| /******************************************************************************** | |||
| * Helpers for vs implementations that don't do their own fetch/emit varients. | |||
| * Means these can be shared between shaders. | |||
| */ | |||
| struct translate; | |||
| struct translate_key; | |||
| struct translate *draw_vs_get_fetch( struct draw_context *draw, | |||
| struct translate_key *key ); | |||
| struct translate *draw_vs_get_emit( struct draw_context *draw, | |||
| struct translate_key *key ); | |||
| struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, | |||
| const struct draw_vs_varient_key *key ); | |||
| static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) | |||
| { | |||
| return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element); | |||
| } | |||
| static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, | |||
| const struct draw_vs_varient_key *b ) | |||
| { | |||
| int keysize = draw_vs_varient_keysize(a); | |||
| return memcmp(a, b, keysize); | |||
| } | |||
| #define MAX_TGSI_VERTICES 4 | |||
| #endif | |||
| @@ -0,0 +1,222 @@ | |||
| /************************************************************************** | |||
| * | |||
| * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
| * All Rights Reserved. | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the | |||
| * "Software"), to deal in the Software without restriction, including | |||
| * without limitation the rights to use, copy, modify, merge, publish, | |||
| * distribute, sub license, and/or sell copies of the Software, and to | |||
| * permit persons to whom the Software is furnished to do so, subject to | |||
| * the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the | |||
| * next paragraph) shall be included in all copies or substantial portions | |||
| * of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| * | |||
| **************************************************************************/ | |||
| /* Authors: Keith Whitwell <keith@tungstengraphics.com> | |||
| */ | |||
| #ifndef DRAW_VS_AOS_H | |||
| #define DRAW_VS_AOS_H | |||
| struct tgsi_token; | |||
| struct x86_function; | |||
| #include "pipe/p_state.h" | |||
| #include "rtasm/rtasm_x86sse.h" | |||
| #define X 0 | |||
| #define Y 1 | |||
| #define Z 2 | |||
| #define W 3 | |||
| #define MAX_INPUTS PIPE_MAX_ATTRIBS | |||
| #define MAX_OUTPUTS PIPE_MAX_ATTRIBS | |||
| #define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ | |||
| #define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ | |||
| #define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ | |||
| #define MAX_INTERNALS 8 | |||
| #define AOS_FILE_INTERNAL TGSI_FILE_COUNT | |||
| #define FPU_RND_NEG 1 | |||
| #define FPU_RND_NEAREST 2 | |||
| struct aos_machine; | |||
| typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, | |||
| float *result, | |||
| const float *in, | |||
| unsigned count ); | |||
| struct shine_tab { | |||
| float exponent; | |||
| float values[258]; | |||
| unsigned last_used; | |||
| }; | |||
| struct lit_info { | |||
| lit_func func; | |||
| struct shine_tab *shine_tab; | |||
| }; | |||
| #define MAX_SHINE_TAB 4 | |||
| #define MAX_LIT_INFO 16 | |||
| /* This is the temporary storage used by all the aos_sse vs varients. | |||
| * Create one per context and reuse by passing a pointer in at | |||
| * vs_varient creation?? | |||
| */ | |||
| struct aos_machine { | |||
| float input [MAX_INPUTS ][4]; | |||
| float output [MAX_OUTPUTS ][4]; | |||
| float temp [MAX_TEMPS ][4]; | |||
| float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */ | |||
| float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ | |||
| float internal [MAX_INTERNALS ][4]; | |||
| float scale[4]; /* viewport */ | |||
| float translate[4]; /* viewport */ | |||
| float tmp[2][4]; /* scratch space for LIT */ | |||
| struct shine_tab shine_tab[MAX_SHINE_TAB]; | |||
| struct lit_info lit_info[MAX_LIT_INFO]; | |||
| unsigned now; | |||
| ushort fpu_rnd_nearest; | |||
| ushort fpu_rnd_neg_inf; | |||
| ushort fpu_restore; | |||
| ushort fpucntl; /* one of FPU_* above */ | |||
| struct { | |||
| const void *input_ptr; | |||
| unsigned input_stride; | |||
| unsigned output_offset; | |||
| } attrib[PIPE_MAX_ATTRIBS]; | |||
| }; | |||
| struct aos_compilation { | |||
| struct x86_function *func; | |||
| struct draw_vs_varient_aos_sse *vaos; | |||
| unsigned insn_counter; | |||
| unsigned num_immediates; | |||
| unsigned count; | |||
| unsigned lit_count; | |||
| struct { | |||
| unsigned idx:16; | |||
| unsigned file:8; | |||
| unsigned dirty:8; | |||
| unsigned last_used; | |||
| } xmm[8]; | |||
| boolean input_fetched[PIPE_MAX_ATTRIBS]; | |||
| unsigned output_last_write[PIPE_MAX_ATTRIBS]; | |||
| boolean have_sse2; | |||
| boolean error; | |||
| short fpucntl; | |||
| /* these are actually known values, but putting them in a struct | |||
| * like this is helpful to keep them in sync across the file. | |||
| */ | |||
| struct x86_reg tmp_EAX; | |||
| struct x86_reg idx_EBX; /* either start+i or &elt[i] */ | |||
| struct x86_reg outbuf_ECX; | |||
| struct x86_reg machine_EDX; | |||
| struct x86_reg count_ESI; /* decrements to zero */ | |||
| }; | |||
| struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); | |||
| void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx ); | |||
| void aos_adopt_xmm_reg( struct aos_compilation *cp, | |||
| struct x86_reg reg, | |||
| unsigned file, | |||
| unsigned idx, | |||
| unsigned dirty ); | |||
| struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, | |||
| unsigned file, | |||
| unsigned idx ); | |||
| boolean aos_fetch_inputs( struct aos_compilation *cp, | |||
| boolean linear ); | |||
| boolean aos_emit_outputs( struct aos_compilation *cp ); | |||
| #define IMM_ONES 0 /* 1, 1,1,1 */ | |||
| #define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */ | |||
| #define IMM_IDENTITY 2 /* 0, 0,0,1 */ | |||
| #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ | |||
| #define IMM_255 4 /* 255, 255, 255, 255 */ | |||
| #define IMM_NEGS 5 /* -1,-1,-1,-1 */ | |||
| #define IMM_RSQ 6 /* -.5,1.5,_,_ */ | |||
| #define IMM_PSIZE 7 /* not really an immediate - updated each run */ | |||
| struct x86_reg aos_get_internal( struct aos_compilation *cp, | |||
| unsigned imm ); | |||
| struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, | |||
| unsigned imm ); | |||
| #define ERROR(cp, msg) \ | |||
| do { \ | |||
| debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ | |||
| cp->error = 1; \ | |||
| assert(0); \ | |||
| } while (0) | |||
| struct draw_vs_varient_aos_sse { | |||
| struct draw_vs_varient base; | |||
| struct draw_context *draw; | |||
| #if 0 | |||
| struct { | |||
| const void *ptr; | |||
| unsigned stride; | |||
| } attrib[PIPE_MAX_ATTRIBS]; | |||
| #endif | |||
| struct aos_machine *machine; /* XXX: temporarily unshared */ | |||
| vsv_run_linear_func gen_run_linear; | |||
| vsv_run_elts_func gen_run_elts; | |||
| struct x86_function func[2]; | |||
| }; | |||
| #endif | |||
| @@ -0,0 +1,326 @@ | |||
| /************************************************************************** | |||
| * | |||
| * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
| * All Rights Reserved. | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the | |||
| * "Software"), to deal in the Software without restriction, including | |||
| * without limitation the rights to use, copy, modify, merge, publish, | |||
| * distribute, sub license, and/or sell copies of the Software, and to | |||
| * permit persons to whom the Software is furnished to do so, subject to | |||
| * the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the | |||
| * next paragraph) shall be included in all copies or substantial portions | |||
| * of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| * | |||
| **************************************************************************/ | |||
| #include "pipe/p_util.h" | |||
| #include "pipe/p_shader_tokens.h" | |||
| #include "tgsi/util/tgsi_parse.h" | |||
| #include "tgsi/util/tgsi_util.h" | |||
| #include "tgsi/exec/tgsi_exec.h" | |||
| #include "draw_vs.h" | |||
| #include "draw_vs_aos.h" | |||
| #include "draw_vertex.h" | |||
| #include "rtasm/rtasm_x86sse.h" | |||
| #ifdef PIPE_ARCH_X86 | |||
| /* Note - don't yet have to worry about interacting with the code in | |||
| * draw_vs_aos.c as there is no intermingling of generated code... | |||
| * That may have to change, we'll see. | |||
| */ | |||
| static void emit_load_R32G32B32A32( struct aos_compilation *cp, | |||
| struct x86_reg data, | |||
| struct x86_reg src_ptr ) | |||
| { | |||
| sse_movups(cp->func, data, src_ptr); | |||
| } | |||
| static void emit_load_R32G32B32( struct aos_compilation *cp, | |||
| struct x86_reg data, | |||
| struct x86_reg src_ptr ) | |||
| { | |||
| sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); | |||
| sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); | |||
| sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); | |||
| sse_movlps(cp->func, data, src_ptr); | |||
| } | |||
| static void emit_load_R32G32( struct aos_compilation *cp, | |||
| struct x86_reg data, | |||
| struct x86_reg src_ptr ) | |||
| { | |||
| sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); | |||
| sse_movlps(cp->func, data, src_ptr); | |||
| } | |||
| static void emit_load_R32( struct aos_compilation *cp, | |||
| struct x86_reg data, | |||
| struct x86_reg src_ptr ) | |||
| { | |||
| sse_movss(cp->func, data, src_ptr); | |||
| sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); | |||
| } | |||
| static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, | |||
| struct x86_reg data, | |||
| struct x86_reg src_ptr ) | |||
| { | |||
| sse_movss(cp->func, data, src_ptr); | |||
| sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); | |||
| sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); | |||
| sse2_cvtdq2ps(cp->func, data, data); | |||
| sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); | |||
| } | |||
| static void get_src_ptr( struct x86_function *func, | |||
| struct x86_reg src, | |||
| struct x86_reg machine, | |||
| struct x86_reg elt, | |||
| unsigned a ) | |||
| { | |||
| struct x86_reg input_ptr = | |||
| x86_make_disp(machine, | |||
| Offset(struct aos_machine, attrib[a].input_ptr)); | |||
| struct x86_reg input_stride = | |||
| x86_make_disp(machine, | |||
| Offset(struct aos_machine, attrib[a].input_stride)); | |||
| /* Calculate pointer to current attrib: | |||
| */ | |||
| x86_mov(func, src, input_stride); | |||
| x86_imul(func, src, elt); | |||
| x86_add(func, src, input_ptr); | |||
| } | |||
| /* Extended swizzles? Maybe later. | |||
| */ | |||
| static void emit_swizzle( struct aos_compilation *cp, | |||
| struct x86_reg dest, | |||
| struct x86_reg src, | |||
| unsigned shuffle ) | |||
| { | |||
| sse_shufps(cp->func, dest, src, shuffle); | |||
| } | |||
| static boolean load_input( struct aos_compilation *cp, | |||
| unsigned idx, | |||
| boolean linear ) | |||
| { | |||
| unsigned format = cp->vaos->base.key.element[idx].in.format; | |||
| struct x86_reg src = cp->tmp_EAX; | |||
| struct x86_reg dataXMM = aos_get_xmm_reg(cp); | |||
| /* Figure out source pointer address: | |||
| */ | |||
| get_src_ptr(cp->func, | |||
| src, | |||
| cp->machine_EDX, | |||
| linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), | |||
| idx); | |||
| src = x86_deref(src); | |||
| aos_adopt_xmm_reg( cp, | |||
| dataXMM, | |||
| TGSI_FILE_INPUT, | |||
| idx, | |||
| TRUE ); | |||
| switch (format) { | |||
| case PIPE_FORMAT_R32_FLOAT: | |||
| emit_load_R32(cp, dataXMM, src); | |||
| break; | |||
| case PIPE_FORMAT_R32G32_FLOAT: | |||
| emit_load_R32G32(cp, dataXMM, src); | |||
| break; | |||
| case PIPE_FORMAT_R32G32B32_FLOAT: | |||
| emit_load_R32G32B32(cp, dataXMM, src); | |||
| break; | |||
| case PIPE_FORMAT_R32G32B32A32_FLOAT: | |||
| emit_load_R32G32B32A32(cp, dataXMM, src); | |||
| break; | |||
| case PIPE_FORMAT_B8G8R8A8_UNORM: | |||
| emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); | |||
| emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); | |||
| break; | |||
| case PIPE_FORMAT_R8G8B8A8_UNORM: | |||
| emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); | |||
| break; | |||
| default: | |||
| ERROR(cp, "unhandled input format"); | |||
| return FALSE; | |||
| } | |||
| return TRUE; | |||
| } | |||
| boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) | |||
| { | |||
| unsigned i; | |||
| for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { | |||
| if (!load_input( cp, i, linear )) | |||
| return FALSE; | |||
| cp->insn_counter++; | |||
| debug_printf("\n"); | |||
| } | |||
| return TRUE; | |||
| } | |||
| static void emit_store_R32G32B32A32( struct aos_compilation *cp, | |||
| struct x86_reg dst_ptr, | |||
| struct x86_reg dataXMM ) | |||
| { | |||
| sse_movups(cp->func, dst_ptr, dataXMM); | |||
| } | |||
| static void emit_store_R32G32B32( struct aos_compilation *cp, | |||
| struct x86_reg dst_ptr, | |||
| struct x86_reg dataXMM ) | |||
| { | |||
| sse_movlps(cp->func, dst_ptr, dataXMM); | |||
| sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ | |||
| sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM); | |||
| } | |||
| static void emit_store_R32G32( struct aos_compilation *cp, | |||
| struct x86_reg dst_ptr, | |||
| struct x86_reg dataXMM ) | |||
| { | |||
| sse_movlps(cp->func, dst_ptr, dataXMM); | |||
| } | |||
| static void emit_store_R32( struct aos_compilation *cp, | |||
| struct x86_reg dst_ptr, | |||
| struct x86_reg dataXMM ) | |||
| { | |||
| sse_movss(cp->func, dst_ptr, dataXMM); | |||
| } | |||
| static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp, | |||
| struct x86_reg dst_ptr, | |||
| struct x86_reg dataXMM ) | |||
| { | |||
| sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255)); | |||
| sse2_cvtps2dq(cp->func, dataXMM, dataXMM); | |||
| sse2_packssdw(cp->func, dataXMM, dataXMM); | |||
| sse2_packuswb(cp->func, dataXMM, dataXMM); | |||
| sse_movss(cp->func, dst_ptr, dataXMM); | |||
| } | |||
| static boolean emit_output( struct aos_compilation *cp, | |||
| struct x86_reg ptr, | |||
| struct x86_reg dataXMM, | |||
| unsigned format ) | |||
| { | |||
| switch (format) { | |||
| case EMIT_1F: | |||
| case EMIT_1F_PSIZE: | |||
| emit_store_R32(cp, ptr, dataXMM); | |||
| break; | |||
| case EMIT_2F: | |||
| emit_store_R32G32(cp, ptr, dataXMM); | |||
| break; | |||
| case EMIT_3F: | |||
| emit_store_R32G32B32(cp, ptr, dataXMM); | |||
| break; | |||
| case EMIT_4F: | |||
| emit_store_R32G32B32A32(cp, ptr, dataXMM); | |||
| break; | |||
| case EMIT_4UB: | |||
| if (1) { | |||
| emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); | |||
| emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); | |||
| } | |||
| else { | |||
| emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); | |||
| } | |||
| break; | |||
| default: | |||
| ERROR(cp, "unhandled output format"); | |||
| return FALSE; | |||
| } | |||
| return TRUE; | |||
| } | |||
| boolean aos_emit_outputs( struct aos_compilation *cp ) | |||
| { | |||
| unsigned i; | |||
| for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { | |||
| unsigned format = cp->vaos->base.key.element[i].out.format; | |||
| unsigned offset = cp->vaos->base.key.element[i].out.offset; | |||
| unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; | |||
| struct x86_reg data; | |||
| if (format == EMIT_1F_PSIZE) { | |||
| data = aos_get_internal_xmm( cp, IMM_PSIZE ); | |||
| } | |||
| else { | |||
| data = aos_get_shader_reg( cp, | |||
| TGSI_FILE_OUTPUT, | |||
| vs_output ); | |||
| } | |||
| if (data.file != file_XMM) { | |||
| struct x86_reg tmp = aos_get_xmm_reg( cp ); | |||
| sse_movups(cp->func, tmp, data); | |||
| data = tmp; | |||
| } | |||
| if (!emit_output( cp, | |||
| x86_make_disp( cp->outbuf_ECX, offset ), | |||
| data, | |||
| format )) | |||
| return FALSE; | |||
| aos_release_xmm_reg( cp, data.idx ); | |||
| cp->insn_counter++; | |||
| debug_printf("\n"); | |||
| } | |||
| return TRUE; | |||
| } | |||
| #endif | |||
| @@ -179,10 +179,12 @@ draw_create_vs_exec(struct draw_context *draw, | |||
| tgsi_scan_shader(state->tokens, &vs->base.info); | |||
| vs->base.draw = draw; | |||
| vs->base.prepare = vs_exec_prepare; | |||
| vs->base.run_linear = vs_exec_run_linear; | |||
| vs->base.delete = vs_exec_delete; | |||
| vs->machine = &draw->machine; | |||
| vs->base.create_varient = draw_vs_varient_generic; | |||
| vs->machine = &draw->vs.machine; | |||
| return &vs->base; | |||
| } | |||
| @@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw, | |||
| tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); | |||
| vs->base.draw = draw; | |||
| vs->base.prepare = vs_llvm_prepare; | |||
| vs->base.create_varient = draw_vs_varient_generic; | |||
| vs->base.run_linear = vs_llvm_run_linear; | |||
| vs->base.delete = vs_llvm_delete; | |||
| vs->machine = &draw->machine; | |||
| @@ -47,9 +47,7 @@ | |||
| #include "tgsi/util/tgsi_parse.h" | |||
| #define SSE_MAX_VERTICES 4 | |||
| #define SSE_SWIZZLES 1 | |||
| #if SSE_SWIZZLES | |||
| typedef void (XSTDCALL *codegen_function) ( | |||
| const struct tgsi_exec_vector *input, /* 1 */ | |||
| struct tgsi_exec_vector *output, /* 2 */ | |||
| @@ -62,14 +60,6 @@ typedef void (XSTDCALL *codegen_function) ( | |||
| float (*aos_output)[4], /* 9 */ | |||
| uint num_outputs, /* 10 */ | |||
| uint output_stride ); /* 11 */ | |||
| #else | |||
| typedef void (XSTDCALL *codegen_function) ( | |||
| const struct tgsi_exec_vector *input, | |||
| struct tgsi_exec_vector *output, | |||
| float (*constant)[4], | |||
| struct tgsi_exec_vector *temporary, | |||
| float (*immediates)[4] ); | |||
| #endif | |||
| struct draw_sse_vertex_shader { | |||
| struct draw_vertex_shader base; | |||
| @@ -111,7 +101,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, | |||
| for (i = 0; i < count; i += MAX_TGSI_VERTICES) { | |||
| unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); | |||
| #if SSE_SWIZZLES | |||
| /* run compiled shader | |||
| */ | |||
| shader->func(machine->Inputs, | |||
| @@ -128,43 +117,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, | |||
| input = (const float (*)[4])((const char *)input + input_stride * max_vertices); | |||
| output = (float (*)[4])((char *)output + output_stride * max_vertices); | |||
| #else | |||
| unsigned int j, slot; | |||
| /* Swizzle inputs. | |||
| */ | |||
| for (j = 0; j < max_vertices; j++) { | |||
| for (slot = 0; slot < base->info.num_inputs; slot++) { | |||
| machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; | |||
| machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; | |||
| machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; | |||
| machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; | |||
| } | |||
| input = (const float (*)[4])((const char *)input + input_stride); | |||
| } | |||
| /* run compiled shader | |||
| */ | |||
| shader->func(machine->Inputs, | |||
| machine->Outputs, | |||
| (float (*)[4])constants, | |||
| machine->Temps, | |||
| shader->immediates); | |||
| /* Unswizzle all output results. | |||
| */ | |||
| for (j = 0; j < max_vertices; j++) { | |||
| for (slot = 0; slot < base->info.num_outputs; slot++) { | |||
| output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; | |||
| output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; | |||
| output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; | |||
| output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; | |||
| } | |||
| output = (float (*)[4])((char *)output + output_stride); | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| @@ -203,15 +155,18 @@ draw_create_vs_sse(struct draw_context *draw, | |||
| tgsi_scan_shader(templ->tokens, &vs->base.info); | |||
| vs->base.draw = draw; | |||
| vs->base.create_varient = draw_vs_varient_aos_sse; | |||
| // vs->base.create_varient = draw_vs_varient_generic; | |||
| vs->base.prepare = vs_sse_prepare; | |||
| vs->base.run_linear = vs_sse_run_linear; | |||
| vs->base.delete = vs_sse_delete; | |||
| vs->machine = &draw->machine; | |||
| vs->machine = &draw->vs.machine; | |||
| x86_init_func( &vs->sse2_program ); | |||
| if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, | |||
| &vs->sse2_program, vs->immediates, SSE_SWIZZLES )) | |||
| &vs->sse2_program, vs->immediates, TRUE )) | |||
| goto fail; | |||
| vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); | |||
| @@ -0,0 +1,326 @@ | |||
| /************************************************************************** | |||
| * | |||
| * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | |||
| * All Rights Reserved. | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the | |||
| * "Software"), to deal in the Software without restriction, including | |||
| * without limitation the rights to use, copy, modify, merge, publish, | |||
| * distribute, sub license, and/or sell copies of the Software, and to | |||
| * permit persons to whom the Software is furnished to do so, subject to | |||
| * the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the | |||
| * next paragraph) shall be included in all copies or substantial portions | |||
| * of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| * | |||
| **************************************************************************/ | |||
| /* | |||
| * Authors: | |||
| * Keith Whitwell <keith@tungstengraphics.com> | |||
| */ | |||
| #include "pipe/p_util.h" | |||
| #include "draw/draw_context.h" | |||
| #include "draw/draw_private.h" | |||
| #include "draw/draw_vbuf.h" | |||
| #include "draw/draw_vertex.h" | |||
| #include "draw/draw_vs.h" | |||
| #include "translate/translate.h" | |||
| #include "translate/translate_cache.h" | |||
| /* A first pass at incorporating vertex fetch/emit functionality into | |||
| */ | |||
| struct draw_vs_varient_generic { | |||
| struct draw_vs_varient base; | |||
| struct pipe_viewport_state viewport; | |||
| struct draw_vertex_shader *shader; | |||
| struct draw_context *draw; | |||
| /* Basic plan is to run these two translate functions before/after | |||
| * the vertex shader's existing run_linear() routine to simulate | |||
| * the inclusion of this functionality into the shader... | |||
| * | |||
| * Next will look at actually including it. | |||
| */ | |||
| struct translate *fetch; | |||
| struct translate *emit; | |||
| const float (*constants)[4]; | |||
| }; | |||
| static void vsvg_set_constants( struct draw_vs_varient *varient, | |||
| const float (*constants)[4] ) | |||
| { | |||
| struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
| vsvg->constants = constants; | |||
| } | |||
| static void vsvg_set_input( struct draw_vs_varient *varient, | |||
| unsigned buffer, | |||
| const void *ptr, | |||
| unsigned stride ) | |||
| { | |||
| struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
| vsvg->fetch->set_buffer(vsvg->fetch, | |||
| buffer, | |||
| ptr, | |||
| stride); | |||
| } | |||
| /* Mainly for debug at this stage: | |||
| */ | |||
| static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| { | |||
| char *ptr = (char *)output_buffer; | |||
| const float *scale = vsvg->viewport.scale; | |||
| const float *trans = vsvg->viewport.translate; | |||
| unsigned stride = vsvg->base.key.output_stride; | |||
| unsigned j; | |||
| for (j = 0; j < count; j++, ptr += stride) { | |||
| float *data = (float *)ptr; | |||
| float w = 1.0f / data[3]; | |||
| data[0] = data[0] * w * scale[0] + trans[0]; | |||
| data[1] = data[1] * w * scale[1] + trans[1]; | |||
| data[2] = data[2] * w * scale[2] + trans[2]; | |||
| data[3] = w; | |||
| } | |||
| } | |||
| static void do_viewport( struct draw_vs_varient_generic *vsvg, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| { | |||
| char *ptr = (char *)output_buffer; | |||
| const float *scale = vsvg->viewport.scale; | |||
| const float *trans = vsvg->viewport.translate; | |||
| unsigned stride = vsvg->base.key.output_stride; | |||
| unsigned j; | |||
| for (j = 0; j < count; j++, ptr += stride) { | |||
| float *data = (float *)ptr; | |||
| data[0] = data[0] * scale[0] + trans[0]; | |||
| data[1] = data[1] * scale[1] + trans[1]; | |||
| data[2] = data[2] * scale[2] + trans[2]; | |||
| } | |||
| } | |||
| static void vsvg_run_elts( struct draw_vs_varient *varient, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer) | |||
| { | |||
| struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
| /* Want to do this in small batches for cache locality? | |||
| */ | |||
| vsvg->fetch->run_elts( vsvg->fetch, | |||
| elts, | |||
| count, | |||
| output_buffer ); | |||
| //if (!vsvg->base.vs->is_passthrough) | |||
| { | |||
| vsvg->base.vs->run_linear( vsvg->base.vs, | |||
| output_buffer, | |||
| output_buffer, | |||
| vsvg->constants, | |||
| count, | |||
| vsvg->base.key.output_stride, | |||
| vsvg->base.key.output_stride); | |||
| if (vsvg->base.key.clip) { | |||
| /* not really handling clipping, just do the rhw so we can | |||
| * see the results... | |||
| */ | |||
| do_rhw_viewport( vsvg, | |||
| count, | |||
| output_buffer ); | |||
| } | |||
| else if (vsvg->base.key.viewport) { | |||
| do_viewport( vsvg, | |||
| count, | |||
| output_buffer ); | |||
| } | |||
| //if (!vsvg->already_in_emit_format) | |||
| vsvg->emit->set_buffer( vsvg->emit, | |||
| 0, | |||
| output_buffer, | |||
| vsvg->base.key.output_stride ); | |||
| vsvg->emit->run( vsvg->emit, | |||
| 0, count, | |||
| output_buffer ); | |||
| } | |||
| } | |||
| static void vsvg_run_linear( struct draw_vs_varient *varient, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| { | |||
| struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
| //debug_printf("%s %d %d\n", __FUNCTION__, start, count); | |||
| vsvg->fetch->run( vsvg->fetch, | |||
| start, | |||
| count, | |||
| output_buffer ); | |||
| //if (!vsvg->base.vs->is_passthrough) | |||
| { | |||
| vsvg->base.vs->run_linear( vsvg->base.vs, | |||
| output_buffer, | |||
| output_buffer, | |||
| vsvg->constants, | |||
| count, | |||
| vsvg->base.key.output_stride, | |||
| vsvg->base.key.output_stride); | |||
| if (vsvg->base.key.clip) { | |||
| /* not really handling clipping, just do the rhw so we can | |||
| * see the results... | |||
| */ | |||
| do_rhw_viewport( vsvg, | |||
| count, | |||
| output_buffer ); | |||
| } | |||
| else if (vsvg->base.key.viewport) { | |||
| do_viewport( vsvg, | |||
| count, | |||
| output_buffer ); | |||
| } | |||
| //if (!vsvg->already_in_emit_format) | |||
| vsvg->emit->set_buffer( vsvg->emit, | |||
| 0, | |||
| output_buffer, | |||
| vsvg->base.key.output_stride ); | |||
| vsvg->emit->set_buffer( vsvg->emit, | |||
| 1, | |||
| &vsvg->draw->rasterizer->point_size, | |||
| 0); | |||
| vsvg->emit->run( vsvg->emit, | |||
| 0, count, | |||
| output_buffer ); | |||
| } | |||
| } | |||
| static void vsvg_set_viewport( struct draw_vs_varient *varient, | |||
| const struct pipe_viewport_state *viewport ) | |||
| { | |||
| struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; | |||
| vsvg->viewport = *viewport; | |||
| } | |||
| static void vsvg_destroy( struct draw_vs_varient *varient ) | |||
| { | |||
| FREE(varient); | |||
| } | |||
| struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, | |||
| const struct draw_vs_varient_key *key ) | |||
| { | |||
| unsigned i; | |||
| struct translate_key fetch, emit; | |||
| struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); | |||
| if (vsvg == NULL) | |||
| return NULL; | |||
| vsvg->base.key = *key; | |||
| vsvg->base.vs = vs; | |||
| vsvg->base.set_input = vsvg_set_input; | |||
| vsvg->base.set_constants = vsvg_set_constants; | |||
| vsvg->base.set_viewport = vsvg_set_viewport; | |||
| vsvg->base.run_elts = vsvg_run_elts; | |||
| vsvg->base.run_linear = vsvg_run_linear; | |||
| vsvg->base.destroy = vsvg_destroy; | |||
| /* Build free-standing fetch and emit functions: | |||
| */ | |||
| fetch.nr_elements = key->nr_inputs; | |||
| fetch.output_stride = 0; | |||
| for (i = 0; i < key->nr_inputs; i++) { | |||
| fetch.element[i].input_format = key->element[i].in.format; | |||
| fetch.element[i].input_buffer = key->element[i].in.buffer; | |||
| fetch.element[i].input_offset = key->element[i].in.offset; | |||
| fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; | |||
| fetch.element[i].output_offset = fetch.output_stride; | |||
| fetch.output_stride += 4 * sizeof(float); | |||
| } | |||
| emit.nr_elements = key->nr_outputs; | |||
| emit.output_stride = key->output_stride; | |||
| for (i = 0; i < key->nr_outputs; i++) { | |||
| if (key->element[i].out.format != EMIT_1F_PSIZE) | |||
| { | |||
| emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; | |||
| emit.element[i].input_buffer = 0; | |||
| emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); | |||
| emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); | |||
| emit.element[i].output_offset = key->element[i].out.offset; | |||
| } | |||
| else { | |||
| emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; | |||
| emit.element[i].input_buffer = 1; | |||
| emit.element[i].input_offset = 0; | |||
| emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; | |||
| emit.element[i].output_offset = key->element[i].out.offset; | |||
| } | |||
| } | |||
| vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); | |||
| vsvg->emit = draw_vs_get_emit( vs->draw, &emit ); | |||
| return &vsvg->base; | |||
| } | |||
| @@ -36,11 +36,8 @@ | |||
| #define DUMP_SSE 0 | |||
| #if DUMP_SSE | |||
| static void | |||
| _print_reg( | |||
| struct x86_reg reg ) | |||
| void x86_print_reg( struct x86_reg reg ) | |||
| { | |||
| if (reg.mod != mod_REG) | |||
| debug_printf( "[" ); | |||
| @@ -77,6 +74,7 @@ _print_reg( | |||
| debug_printf( "]" ); | |||
| } | |||
| #if DUMP_SSE | |||
| #define DUMP_START() debug_printf( "\n" ) | |||
| #define DUMP_END() debug_printf( "\n" ) | |||
| @@ -87,7 +85,7 @@ _print_reg( | |||
| foo++; \ | |||
| if (*foo) \ | |||
| foo++; \ | |||
| debug_printf( "\n% 15s ", foo ); \ | |||
| debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ | |||
| } while (0) | |||
| #define DUMP_I( I ) do { \ | |||
| @@ -97,27 +95,27 @@ _print_reg( | |||
| #define DUMP_R( R0 ) do { \ | |||
| DUMP(); \ | |||
| _print_reg( R0 ); \ | |||
| x86_print_reg( R0 ); \ | |||
| } while( 0 ) | |||
| #define DUMP_RR( R0, R1 ) do { \ | |||
| DUMP(); \ | |||
| _print_reg( R0 ); \ | |||
| x86_print_reg( R0 ); \ | |||
| debug_printf( ", " ); \ | |||
| _print_reg( R1 ); \ | |||
| x86_print_reg( R1 ); \ | |||
| } while( 0 ) | |||
| #define DUMP_RI( R0, I ) do { \ | |||
| DUMP(); \ | |||
| _print_reg( R0 ); \ | |||
| x86_print_reg( R0 ); \ | |||
| debug_printf( ", %u", I ); \ | |||
| } while( 0 ) | |||
| #define DUMP_RRI( R0, R1, I ) do { \ | |||
| DUMP(); \ | |||
| _print_reg( R0 ); \ | |||
| x86_print_reg( R0 ); \ | |||
| debug_printf( ", " ); \ | |||
| _print_reg( R1 ); \ | |||
| x86_print_reg( R1 ); \ | |||
| debug_printf( ", %u", I ); \ | |||
| } while( 0 ) | |||
| @@ -220,6 +218,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1 | |||
| /* Build a modRM byte + possible displacement. No treatment of SIB | |||
| * indexing. BZZT - no way to encode an absolute address. | |||
| * | |||
| * This is the "/r" field in the x86 manuals... | |||
| */ | |||
| static void emit_modrm( struct x86_function *p, | |||
| struct x86_reg reg, | |||
| @@ -258,7 +258,8 @@ static void emit_modrm( struct x86_function *p, | |||
| } | |||
| } | |||
| /* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. | |||
| */ | |||
| static void emit_modrm_noreg( struct x86_function *p, | |||
| unsigned op, | |||
| struct x86_reg regmem ) | |||
| @@ -367,8 +368,7 @@ void x86_jcc( struct x86_function *p, | |||
| DUMP_I(cc); | |||
| if (offset < 0) { | |||
| int amt = p->csr - p->store; | |||
| assert(amt > -offset); | |||
| assert(p->csr - p->store > -offset); | |||
| } | |||
| if (offset <= 127 && offset >= -128) { | |||
| @@ -445,6 +445,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) | |||
| emit_1i(p, imm); | |||
| } | |||
| void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) | |||
| { | |||
| DUMP_RI( dst, imm ); | |||
| assert(dst.mod == mod_REG); | |||
| emit_1ub(p, 0x80); | |||
| emit_modrm_noreg(p, 0, dst); | |||
| emit_1ub(p, imm); | |||
| } | |||
| void x86_push( struct x86_function *p, | |||
| struct x86_reg reg ) | |||
| { | |||
| @@ -461,6 +471,17 @@ void x86_push( struct x86_function *p, | |||
| p->stack_offset += 4; | |||
| } | |||
| void x86_push_imm32( struct x86_function *p, | |||
| int imm32 ) | |||
| { | |||
| DUMP_I( imm32 ); | |||
| emit_1ub(p, 0x68); | |||
| emit_1i(p, imm32); | |||
| p->stack_offset += 4; | |||
| } | |||
| void x86_pop( struct x86_function *p, | |||
| struct x86_reg reg ) | |||
| { | |||
| @@ -988,6 +1009,24 @@ void sse2_movd( struct x86_function *p, | |||
| /*********************************************************************** | |||
| * x87 instructions | |||
| */ | |||
| static void note_x87_pop( struct x86_function *p ) | |||
| { | |||
| p->x87_stack--; | |||
| assert(p->x87_stack >= 0); | |||
| } | |||
| static void note_x87_push( struct x86_function *p ) | |||
| { | |||
| p->x87_stack++; | |||
| assert(p->x87_stack <= 7); | |||
| } | |||
| void x87_assert_stack_empty( struct x86_function *p ) | |||
| { | |||
| assert (p->x87_stack == 0); | |||
| } | |||
| void x87_fist( struct x86_function *p, struct x86_reg dst ) | |||
| { | |||
| DUMP_R( dst ); | |||
| @@ -1000,6 +1039,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst ) | |||
| DUMP_R( dst ); | |||
| emit_1ub(p, 0xdb); | |||
| emit_modrm_noreg(p, 3, dst); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fild( struct x86_function *p, struct x86_reg arg ) | |||
| @@ -1007,12 +1047,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg ) | |||
| DUMP_R( arg ); | |||
| emit_1ub(p, 0xdf); | |||
| emit_modrm_noreg(p, 0, arg); | |||
| note_x87_push(p); | |||
| } | |||
| void x87_fldz( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xee); | |||
| note_x87_push(p); | |||
| } | |||
| @@ -1029,18 +1071,21 @@ void x87_fld1( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xe8); | |||
| note_x87_push(p); | |||
| } | |||
| void x87_fldl2e( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xea); | |||
| note_x87_push(p); | |||
| } | |||
| void x87_fldln2( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xed); | |||
| note_x87_push(p); | |||
| } | |||
| void x87_fwait( struct x86_function *p ) | |||
| @@ -1061,6 +1106,49 @@ void x87_fclex( struct x86_function *p ) | |||
| x87_fnclex(p); | |||
| } | |||
| void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xda, 0xc0+arg.idx); | |||
| } | |||
| void x87_fcmove( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xda, 0xc8+arg.idx); | |||
| } | |||
| void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xda, 0xd0+arg.idx); | |||
| } | |||
| void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xdb, 0xc0+arg.idx); | |||
| } | |||
| void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xdb, 0xc8+arg.idx); | |||
| } | |||
| void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xdb, 0xd0+arg.idx); | |||
| } | |||
| static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, | |||
| unsigned char dst0ub0, | |||
| @@ -1148,6 +1236,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) | |||
| assert(dst.file == file_x87); | |||
| assert(dst.idx >= 1); | |||
| emit_2ub(p, 0xde, 0xc8+dst.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fsubp( struct x86_function *p, struct x86_reg dst ) | |||
| @@ -1156,6 +1245,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) | |||
| assert(dst.file == file_x87); | |||
| assert(dst.idx >= 1); | |||
| emit_2ub(p, 0xde, 0xe8+dst.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) | |||
| @@ -1164,6 +1254,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) | |||
| assert(dst.file == file_x87); | |||
| assert(dst.idx >= 1); | |||
| emit_2ub(p, 0xde, 0xe0+dst.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_faddp( struct x86_function *p, struct x86_reg dst ) | |||
| @@ -1172,6 +1263,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) | |||
| assert(dst.file == file_x87); | |||
| assert(dst.idx >= 1); | |||
| emit_2ub(p, 0xde, 0xc0+dst.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fdivp( struct x86_function *p, struct x86_reg dst ) | |||
| @@ -1180,6 +1272,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) | |||
| assert(dst.file == file_x87); | |||
| assert(dst.idx >= 1); | |||
| emit_2ub(p, 0xde, 0xf8+dst.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) | |||
| @@ -1188,6 +1281,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) | |||
| assert(dst.file == file_x87); | |||
| assert(dst.idx >= 1); | |||
| emit_2ub(p, 0xde, 0xf0+dst.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_ftst( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xe4); | |||
| } | |||
| void x87_fucom( struct x86_function *p, struct x86_reg arg ) | |||
| @@ -1202,12 +1302,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg ) | |||
| DUMP_R( arg ); | |||
| assert(arg.file == file_x87); | |||
| emit_2ub(p, 0xdd, 0xe8+arg.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fucompp( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xda, 0xe9); | |||
| note_x87_pop(p); /* pop twice */ | |||
| note_x87_pop(p); /* pop twice */ | |||
| } | |||
| void x87_fxch( struct x86_function *p, struct x86_reg arg ) | |||
| @@ -1289,6 +1392,7 @@ void x87_fyl2x( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xf1); | |||
| note_x87_pop(p); | |||
| } | |||
| /* st1 = st1 * log2(st0 + 1.0); | |||
| @@ -1300,6 +1404,7 @@ void x87_fyl2xp1( struct x86_function *p ) | |||
| { | |||
| DUMP(); | |||
| emit_2ub(p, 0xd9, 0xf9); | |||
| note_x87_pop(p); | |||
| } | |||
| @@ -1312,6 +1417,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) | |||
| emit_1ub(p, 0xd9); | |||
| emit_modrm_noreg(p, 0, arg); | |||
| } | |||
| note_x87_push(p); | |||
| } | |||
| void x87_fst( struct x86_function *p, struct x86_reg dst ) | |||
| @@ -1334,8 +1440,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) | |||
| emit_1ub(p, 0xd9); | |||
| emit_modrm_noreg(p, 3, dst); | |||
| } | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fpop( struct x86_function *p ) | |||
| { | |||
| x87_fstp( p, x86_make_reg( file_x87, 0 )); | |||
| } | |||
| void x87_fcom( struct x86_function *p, struct x86_reg dst ) | |||
| { | |||
| DUMP_R( dst ); | |||
| @@ -1347,6 +1460,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) | |||
| } | |||
| } | |||
| void x87_fcomp( struct x86_function *p, struct x86_reg dst ) | |||
| { | |||
| DUMP_R( dst ); | |||
| @@ -1356,6 +1470,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) | |||
| emit_1ub(p, 0xd8); | |||
| emit_modrm_noreg(p, 3, dst); | |||
| } | |||
| note_x87_pop(p); | |||
| } | |||
| void x87_fcomi( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| emit_2ub(p, 0xdb, 0xf0+arg.idx); | |||
| } | |||
| void x87_fcomip( struct x86_function *p, struct x86_reg arg ) | |||
| { | |||
| DUMP_R( arg ); | |||
| emit_2ub(p, 0xdb, 0xf0+arg.idx); | |||
| note_x87_pop(p); | |||
| } | |||
| @@ -1374,6 +1502,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) | |||
| } | |||
| void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) | |||
| { | |||
| DUMP_R( dst ); | |||
| assert(dst.file == file_REG32); | |||
| emit_1ub(p, 0x9b); /* WAIT -- needed? */ | |||
| emit_1ub(p, 0xd9); | |||
| emit_modrm_noreg(p, 7, dst); | |||
| } | |||
| /*********************************************************************** | |||
| @@ -1442,6 +1581,21 @@ void mmx_movq( struct x86_function *p, | |||
| */ | |||
| void x86_cdecl_caller_push_regs( struct x86_function *p ) | |||
| { | |||
| x86_push(p, x86_make_reg(file_REG32, reg_AX)); | |||
| x86_push(p, x86_make_reg(file_REG32, reg_CX)); | |||
| x86_push(p, x86_make_reg(file_REG32, reg_DX)); | |||
| } | |||
| void x86_cdecl_caller_pop_regs( struct x86_function *p ) | |||
| { | |||
| x86_pop(p, x86_make_reg(file_REG32, reg_DX)); | |||
| x86_pop(p, x86_make_reg(file_REG32, reg_CX)); | |||
| x86_pop(p, x86_make_reg(file_REG32, reg_AX)); | |||
| } | |||
| /* Retreive a reference to one of the function arguments, taking into | |||
| * account any push/pop activity: | |||
| */ | |||
| @@ -41,10 +41,12 @@ struct x86_function { | |||
| unsigned size; | |||
| unsigned char *store; | |||
| unsigned char *csr; | |||
| unsigned stack_offset; | |||
| int need_emms; | |||
| unsigned stack_offset:16; | |||
| unsigned need_emms:8; | |||
| int x87_stack:8; | |||
| unsigned char error_overflow[4]; | |||
| const char *fn; | |||
| }; | |||
| enum x86_reg_file { | |||
| @@ -107,6 +109,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ); | |||
| void x86_release_func( struct x86_function *p ); | |||
| void (*x86_get_func( struct x86_function *p ))( void ); | |||
| /* Debugging: | |||
| */ | |||
| void x86_print_reg( struct x86_reg reg ); | |||
| /* Create and manipulate registers and regmem values: | |||
| @@ -150,6 +155,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg); | |||
| * I load the immediate into general purpose register and use it. | |||
| */ | |||
| void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); | |||
| void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); | |||
| /* Macro for sse_shufps() and sse2_pshufd(): | |||
| @@ -220,6 +226,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
| void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
| void x86_pop( struct x86_function *p, struct x86_reg reg ); | |||
| void x86_push( struct x86_function *p, struct x86_reg reg ); | |||
| void x86_push_imm32( struct x86_function *p, int imm ); | |||
| void x86_ret( struct x86_function *p ); | |||
| void x86_retw( struct x86_function *p, unsigned short imm ); | |||
| void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
| @@ -227,13 +234,27 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
| void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); | |||
| void x86_sahf( struct x86_function *p ); | |||
| void x86_cdecl_caller_push_regs( struct x86_function *p ); | |||
| void x86_cdecl_caller_pop_regs( struct x86_function *p ); | |||
| void x87_assert_stack_empty( struct x86_function *p ); | |||
| void x87_f2xm1( struct x86_function *p ); | |||
| void x87_fabs( struct x86_function *p ); | |||
| void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
| void x87_faddp( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fchs( struct x86_function *p ); | |||
| void x87_fclex( struct x86_function *p ); | |||
| void x87_fcmovb( struct x86_function *p, struct x86_reg src ); | |||
| void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); | |||
| void x87_fcmove( struct x86_function *p, struct x86_reg src ); | |||
| void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); | |||
| void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); | |||
| void x87_fcmovne( struct x86_function *p, struct x86_reg src ); | |||
| void x87_fcom( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fcomi( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fcomip( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fcomp( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fcos( struct x86_function *p ); | |||
| void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
| @@ -253,6 +274,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
| void x87_fmulp( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fnclex( struct x86_function *p ); | |||
| void x87_fprndint( struct x86_function *p ); | |||
| void x87_fpop( struct x86_function *p ); | |||
| void x87_fscale( struct x86_function *p ); | |||
| void x87_fsin( struct x86_function *p ); | |||
| void x87_fsincos( struct x86_function *p ); | |||
| @@ -263,11 +285,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
| void x87_fsubp( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); | |||
| void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_ftst( struct x86_function *p ); | |||
| void x87_fxch( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fxtract( struct x86_function *p ); | |||
| void x87_fyl2x( struct x86_function *p ); | |||
| void x87_fyl2xp1( struct x86_function *p ); | |||
| void x87_fwait( struct x86_function *p ); | |||
| void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); | |||
| void x87_fucompp( struct x86_function *p ); | |||
| void x87_fucomp( struct x86_function *p, struct x86_reg arg ); | |||
| @@ -539,9 +539,9 @@ static const char *TGSI_MODULATES[] = | |||
| "MODULATE_EIGHTH" | |||
| }; | |||
| static void | |||
| dump_declaration_short( | |||
| struct tgsi_full_declaration *decl ) | |||
| void | |||
| tgsi_dump_declaration( | |||
| const struct tgsi_full_declaration *decl ) | |||
| { | |||
| TXT( "\nDCL " ); | |||
| ENM( decl->Declaration.File, TGSI_FILES_SHORT ); | |||
| @@ -672,9 +672,9 @@ dump_declaration_verbose( | |||
| } | |||
| } | |||
| static void | |||
| dump_immediate_short( | |||
| struct tgsi_full_immediate *imm ) | |||
| void | |||
| tgsi_dump_immediate( | |||
| const struct tgsi_full_immediate *imm ) | |||
| { | |||
| unsigned i; | |||
| @@ -727,9 +727,9 @@ dump_immediate_verbose( | |||
| } | |||
| } | |||
| static void | |||
| dump_instruction_short( | |||
| struct tgsi_full_instruction *inst, | |||
| void | |||
| tgsi_dump_instruction( | |||
| const struct tgsi_full_instruction *inst, | |||
| unsigned instno ) | |||
| { | |||
| unsigned i; | |||
| @@ -1281,17 +1281,17 @@ tgsi_dump( | |||
| switch( parse.FullToken.Token.Type ) { | |||
| case TGSI_TOKEN_TYPE_DECLARATION: | |||
| dump_declaration_short( | |||
| tgsi_dump_declaration( | |||
| &parse.FullToken.FullDeclaration ); | |||
| break; | |||
| case TGSI_TOKEN_TYPE_IMMEDIATE: | |||
| dump_immediate_short( | |||
| tgsi_dump_immediate( | |||
| &parse.FullToken.FullImmediate ); | |||
| break; | |||
| case TGSI_TOKEN_TYPE_INSTRUCTION: | |||
| dump_instruction_short( | |||
| tgsi_dump_instruction( | |||
| &parse.FullToken.FullInstruction, | |||
| instno ); | |||
| instno++; | |||
| @@ -14,6 +14,24 @@ tgsi_dump( | |||
| const struct tgsi_token *tokens, | |||
| unsigned flags ); | |||
| struct tgsi_full_immediate; | |||
| struct tgsi_full_instruction; | |||
| struct tgsi_full_declaration; | |||
| void | |||
| tgsi_dump_immediate( | |||
| const struct tgsi_full_immediate *imm ); | |||
| void | |||
| tgsi_dump_instruction( | |||
| const struct tgsi_full_instruction *inst, | |||
| unsigned instno ); | |||
| void | |||
| tgsi_dump_declaration( | |||
| const struct tgsi_full_declaration *decl ); | |||
| #if defined __cplusplus | |||
| } | |||
| #endif | |||
| @@ -71,15 +71,15 @@ struct translate { | |||
| const void *ptr, | |||
| unsigned stride ); | |||
| void (*run_elts)( struct translate *, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer); | |||
| void (*run)( struct translate *, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer); | |||
| void (PIPE_CDECL *run_elts)( struct translate *, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer); | |||
| void (PIPE_CDECL *run)( struct translate *, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer); | |||
| }; | |||
| @@ -541,10 +541,10 @@ static emit_func get_emit_func( enum pipe_format format ) | |||
| /** | |||
| * Fetch vertex attributes for 'count' vertices. | |||
| */ | |||
| static void generic_run_elts( struct translate *translate, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| static void PIPE_CDECL generic_run_elts( struct translate *translate, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| { | |||
| struct translate_generic *tg = translate_generic(translate); | |||
| char *vert = output_buffer; | |||
| @@ -580,10 +580,10 @@ static void generic_run_elts( struct translate *translate, | |||
| static void generic_run( struct translate *translate, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| static void PIPE_CDECL generic_run( struct translate *translate, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer ) | |||
| { | |||
| struct translate_generic *tg = translate_generic(translate); | |||
| char *vert = output_buffer; | |||
| @@ -45,22 +45,16 @@ | |||
| #define W 3 | |||
| #ifdef WIN32 | |||
| #define RTASM __cdecl | |||
| #else | |||
| #define RTASM | |||
| #endif | |||
| typedef void (RTASM *run_func)( struct translate *translate, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer ); | |||
| typedef void (RTASM *run_elts_func)( struct translate *translate, | |||
| const unsigned *elts, | |||
| typedef void (PIPE_CDECL *run_func)( struct translate *translate, | |||
| unsigned start, | |||
| unsigned count, | |||
| void *output_buffer ); | |||
| typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, | |||
| const unsigned *elts, | |||
| unsigned count, | |||
| void *output_buffer ); | |||
| struct translate_sse { | |||
| @@ -472,13 +466,7 @@ static boolean build_vertex_emit( struct translate_sse *p, | |||
| x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); | |||
| /* Incr index | |||
| */ /* Emit code for each of the attributes. Currently routes | |||
| * everything through SSE registers, even when it might be more | |||
| * efficient to stick with regular old x86. No optimization or | |||
| * other tricks - enough new ground to cover here just getting | |||
| * things working. | |||
| */ | |||
| */ | |||
| if (linear) { | |||
| x86_inc(p->func, idxEBX); | |||
| } | |||
| @@ -88,7 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe ) | |||
| struct pipe_winsys *ws = pipe->winsys; | |||
| uint i; | |||
| draw_destroy( softpipe->draw ); | |||
| if (softpipe->draw) | |||
| draw_destroy( softpipe->draw ); | |||
| softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); | |||
| softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); | |||
| @@ -216,17 +217,23 @@ softpipe_create( struct pipe_screen *screen, | |||
| * Create drawing context and plug our rendering stage into it. | |||
| */ | |||
| softpipe->draw = draw_create(); | |||
| assert(softpipe->draw); | |||
| if (!softpipe->draw) | |||
| goto fail; | |||
| softpipe->setup = sp_draw_render_stage(softpipe); | |||
| if (!softpipe->setup) | |||
| goto fail; | |||
| if (GETENV( "SP_NO_RAST" ) != NULL) | |||
| softpipe->no_rast = TRUE; | |||
| if (GETENV( "SP_VBUF" ) != NULL) { | |||
| sp_init_vbuf(softpipe); | |||
| if (GETENV( "SP_NO_VBUF" ) != NULL) { | |||
| /* Deprecated path -- vbuf is the intended interface to the draw module: | |||
| */ | |||
| draw_set_rasterize_stage(softpipe->draw, softpipe->setup); | |||
| } | |||
| else { | |||
| draw_set_rasterize_stage(softpipe->draw, softpipe->setup); | |||
| sp_init_vbuf(softpipe); | |||
| } | |||
| /* plug in AA line/point stages */ | |||
| @@ -241,4 +248,8 @@ softpipe_create( struct pipe_screen *screen, | |||
| sp_init_surface_functions(softpipe); | |||
| return &softpipe->pipe; | |||
| fail: | |||
| softpipe_destroy(&softpipe->pipe); | |||
| return NULL; | |||
| } | |||
| @@ -64,16 +64,17 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) | |||
| } | |||
| typedef const float (*cptrf4)[4]; | |||
| static void | |||
| do_tri(struct draw_stage *stage, struct prim_header *prim) | |||
| { | |||
| struct setup_stage *setup = setup_stage( stage ); | |||
| setup_tri( setup->setup, | |||
| prim->v[0]->data, | |||
| prim->v[1]->data, | |||
| prim->v[2]->data ); | |||
| (cptrf4)prim->v[0]->data, | |||
| (cptrf4)prim->v[1]->data, | |||
| (cptrf4)prim->v[2]->data ); | |||
| } | |||
| static void | |||
| @@ -82,8 +83,8 @@ do_line(struct draw_stage *stage, struct prim_header *prim) | |||
| struct setup_stage *setup = setup_stage( stage ); | |||
| setup_line( setup->setup, | |||
| prim->v[0]->data, | |||
| prim->v[1]->data ); | |||
| (cptrf4)prim->v[0]->data, | |||
| (cptrf4)prim->v[1]->data ); | |||
| } | |||
| static void | |||
| @@ -92,7 +93,7 @@ do_point(struct draw_stage *stage, struct prim_header *prim) | |||
| struct setup_stage *setup = setup_stage( stage ); | |||
| setup_point( setup->setup, | |||
| prim->v[0]->data ); | |||
| (cptrf4)prim->v[0]->data ); | |||
| } | |||
| @@ -116,30 +116,28 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) | |||
| if (prim == PIPE_PRIM_TRIANGLES || | |||
| prim == PIPE_PRIM_LINES || | |||
| prim == PIPE_PRIM_POINTS) { | |||
| cvbr->prim = prim; | |||
| return TRUE; | |||
| } | |||
| else { | |||
| return FALSE; | |||
| } | |||
| cvbr->prim = prim; | |||
| return TRUE; | |||
| } | |||
| static INLINE cptrf4 get_vert( const void *vertex_buffer, | |||
| int index, | |||
| int stride ) | |||
| { | |||
| return (cptrf4)((char *)vertex_buffer + index * stride); | |||
| } | |||
| static void | |||
| sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) | |||
| sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) | |||
| { | |||
| struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); | |||
| struct softpipe_context *softpipe = cvbr->softpipe; | |||
| unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
| unsigned i, j; | |||
| void *vertex_buffer = cvbr->vertex_buffer; | |||
| cptrf4 v[3]; | |||
| unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
| unsigned i; | |||
| const void *vertex_buffer = cvbr->vertex_buffer; | |||
| /* XXX: break this dependency - make setup_context live under | |||
| * softpipe, rename the old "setup" draw stage to something else. | |||
| @@ -149,40 +147,98 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) | |||
| switch (cvbr->prim) { | |||
| case PIPE_PRIM_TRIANGLES: | |||
| for (i = 0; i < nr_indices; i += 3) { | |||
| for (j = 0; j < 3; j++) | |||
| v[j] = (cptrf4)((char *)vertex_buffer + | |||
| indices[i+j] * vertex_size); | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2]); | |||
| case PIPE_PRIM_POINTS: | |||
| for (i = 0; i < nr; i++) { | |||
| setup_point( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-0], stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINES: | |||
| for (i = 0; i < nr_indices; i += 2) { | |||
| for (j = 0; j < 2; j++) | |||
| v[j] = (cptrf4)((char *)vertex_buffer + | |||
| indices[i+j] * vertex_size); | |||
| for (i = 1; i < nr; i += 2) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| for (i = 1; i < nr; i ++) { | |||
| setup_line( setup_ctx, | |||
| v[0], | |||
| v[1] ); | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_POINTS: | |||
| for (i = 0; i < nr_indices; i++) { | |||
| v[0] = (cptrf4)((char *)vertex_buffer + | |||
| indices[i] * vertex_size); | |||
| case PIPE_PRIM_LINE_LOOP: | |||
| for (i = 1; i < nr; i ++) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride) ); | |||
| } | |||
| if (nr) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, indices[nr-1], stride), | |||
| get_vert(vertex_buffer, indices[0], stride) ); | |||
| } | |||
| break; | |||
| setup_point( setup_ctx, | |||
| v[0] ); | |||
| case PIPE_PRIM_TRIANGLES: | |||
| for (i = 2; i < nr; i += 3) { | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-2], stride), | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| for (i = 2; i < nr; i += 1) { | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i+(i&1)-2], stride), | |||
| get_vert(vertex_buffer, indices[i-(i&1)-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_FAN: | |||
| case PIPE_PRIM_POLYGON: | |||
| for (i = 2; i < nr; i += 1) { | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[0], stride), | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUADS: | |||
| for (i = 3; i < nr; i += 4) { | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-3], stride), | |||
| get_vert(vertex_buffer, indices[i-2], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-2], stride), | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| for (i = 3; i < nr; i += 2) { | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-3], stride), | |||
| get_vert(vertex_buffer, indices[i-2], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| setup_tri( setup_ctx, | |||
| get_vert(vertex_buffer, indices[i-1], stride), | |||
| get_vert(vertex_buffer, indices[i-3], stride), | |||
| get_vert(vertex_buffer, indices[i-0], stride)); | |||
| } | |||
| break; | |||
| default: | |||
| assert(0); | |||
| } | |||
| /* XXX: why are we calling this??? If we had to call something, it | |||
| @@ -202,131 +258,107 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) | |||
| struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); | |||
| struct softpipe_context *softpipe = cvbr->softpipe; | |||
| struct draw_stage *setup = softpipe->setup; | |||
| const void *vertex_buffer = cvbr->vertex_buffer; | |||
| const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
| const void *vertex_buffer = NULL; | |||
| const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); | |||
| unsigned i; | |||
| struct setup_context *setup_ctx = sp_draw_setup_context(setup); | |||
| cptrf4 v[3]; | |||
| #define VERTEX(I) \ | |||
| (cptrf4) ((char *) vertex_buffer + (I) * vertex_size) | |||
| vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride); | |||
| switch (cvbr->prim) { | |||
| case PIPE_PRIM_POINTS: | |||
| for (i = 0; i < nr; i++) { | |||
| v[0] = VERTEX(i); | |||
| setup_point( setup_ctx, v[0] ); | |||
| setup_point( setup_ctx, | |||
| get_vert(vertex_buffer, i-0, stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINES: | |||
| assert(nr % 2 == 0); | |||
| for (i = 0; i < nr; i += 2) { | |||
| v[0] = VERTEX(i); | |||
| v[1] = VERTEX(i + 1); | |||
| setup_line( setup_ctx, v[0], v[1] ); | |||
| for (i = 1; i < nr; i += 2) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_STRIP: | |||
| for (i = 1; i < nr; i++) { | |||
| v[0] = VERTEX(i - 1); | |||
| v[1] = VERTEX(i); | |||
| setup_line( setup_ctx, v[0], v[1] ); | |||
| for (i = 1; i < nr; i ++) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_LINE_LOOP: | |||
| for (i = 1; i < nr; i ++) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride) ); | |||
| } | |||
| if (nr) { | |||
| setup_line( setup_ctx, | |||
| get_vert(vertex_buffer, nr-1, stride), | |||
| get_vert(vertex_buffer, 0, stride) ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLES: | |||
| assert(nr % 3 == 0); | |||
| for (i = 0; i < nr; i += 3) { | |||
| v[0] = VERTEX(i + 0); | |||
| v[1] = VERTEX(i + 1); | |||
| v[2] = VERTEX(i + 2); | |||
| for (i = 2; i < nr; i += 3) { | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, i-2, stride), | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_STRIP: | |||
| assert(nr >= 3); | |||
| for (i = 2; i < nr; i++) { | |||
| v[0] = VERTEX(i - 2); | |||
| v[1] = VERTEX(i - 1); | |||
| v[2] = VERTEX(i); | |||
| for (i = 2; i < nr; i += 1) { | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, i+(i&1)-2, stride), | |||
| get_vert(vertex_buffer, i-(i&1)-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_TRIANGLE_FAN: | |||
| assert(nr >= 3); | |||
| for (i = 2; i < nr; i++) { | |||
| v[0] = VERTEX(0); | |||
| v[1] = VERTEX(i - 1); | |||
| v[2] = VERTEX(i); | |||
| case PIPE_PRIM_POLYGON: | |||
| for (i = 2; i < nr; i += 1) { | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, 0, stride), | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUADS: | |||
| assert(nr % 4 == 0); | |||
| for (i = 0; i < nr; i += 4) { | |||
| v[0] = VERTEX(i + 0); | |||
| v[1] = VERTEX(i + 1); | |||
| v[2] = VERTEX(i + 2); | |||
| for (i = 3; i < nr; i += 4) { | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, i-3, stride), | |||
| get_vert(vertex_buffer, i-2, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| v[0] = VERTEX(i + 0); | |||
| v[1] = VERTEX(i + 2); | |||
| v[2] = VERTEX(i + 3); | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, i-2, stride), | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_QUAD_STRIP: | |||
| assert(nr >= 4); | |||
| for (i = 2; i < nr; i += 2) { | |||
| v[0] = VERTEX(i - 2); | |||
| v[1] = VERTEX(i); | |||
| v[2] = VERTEX(i + 1); | |||
| for (i = 3; i < nr; i += 2) { | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, i-3, stride), | |||
| get_vert(vertex_buffer, i-2, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| v[0] = VERTEX(i - 2); | |||
| v[1] = VERTEX(i + 1); | |||
| v[2] = VERTEX(i - 1); | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| } | |||
| break; | |||
| case PIPE_PRIM_POLYGON: | |||
| /* draw as tri fan */ | |||
| for (i = 2; i < nr; i++) { | |||
| v[0] = VERTEX(0); | |||
| v[1] = VERTEX(i - 1); | |||
| v[2] = VERTEX(i); | |||
| setup_tri( setup_ctx, | |||
| v[0], | |||
| v[1], | |||
| v[2] ); | |||
| get_vert(vertex_buffer, i-1, stride), | |||
| get_vert(vertex_buffer, i-3, stride), | |||
| get_vert(vertex_buffer, i-0, stride)); | |||
| } | |||
| break; | |||
| default: | |||
| /* XXX finish remaining prim types */ | |||
| assert(0); | |||
| } | |||
| #undef VERTEX | |||
| } | |||
| @@ -103,6 +103,17 @@ typedef unsigned int uintptr_t; | |||
| #endif | |||
| /* This should match linux gcc cdecl semantics everywhere, so that we | |||
| * just codegen one calling convention on all platforms. | |||
| */ | |||
| #ifdef WIN32 | |||
| #define PIPE_CDECL __cdecl | |||
| #else | |||
| #define PIPE_CDECL | |||
| #endif | |||
| #if defined __GNUC__ | |||
| #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) | |||
| #define ALIGN16_ASSIGN(NAME) NAME##___aligned | |||
| @@ -115,12 +126,16 @@ typedef unsigned int uintptr_t; | |||
| /** For calling code-gen'd functions */ | |||
| /** | |||
| * For calling code-gen'd functions, phase out in favor of | |||
| * PIPE_CDECL, above, which really means cdecl on all platforms, not | |||
| * like the below... | |||
| */ | |||
| #if !defined(XSTDCALL) | |||
| #if defined(WIN32) | |||
| #define XSTDCALL __stdcall | |||
| #define XSTDCALL __stdcall /* phase this out */ | |||
| #else | |||
| #define XSTDCALL | |||
| #define XSTDCALL /* XXX: NOTE! not STDCALL! */ | |||
| #endif | |||
| #endif | |||
| @@ -797,8 +797,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) | |||
| pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v)); | |||
| } | |||
| if (pipe == NULL) | |||
| goto fail; | |||
| c->st = st_create_context(pipe, &v->mesa_visual, | |||
| share_list ? share_list->st : NULL); | |||
| if (c->st == NULL) | |||
| goto fail; | |||
| mesaCtx = c->st->ctx; | |||
| c->st->ctx->DriverCtx = c; | |||
| @@ -818,6 +824,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) | |||
| #endif | |||
| return c; | |||
| fail: | |||
| if (c->st) | |||
| st_destroy_context(c->st); | |||
| if (pipe) | |||
| pipe->destroy(pipe); | |||
| FREE(c); | |||
| return NULL; | |||
| } | |||
| @@ -53,7 +53,9 @@ struct state_key { | |||
| unsigned light_color_material:1; | |||
| unsigned light_color_material_mask:12; | |||
| unsigned light_material_mask:12; | |||
| unsigned material_shininess_is_zero:1; | |||
| unsigned need_eye_coords:1; | |||
| unsigned normalize:1; | |||
| unsigned rescale_normals:1; | |||
| unsigned fog_source_is_depth:1; | |||
| @@ -154,6 +156,26 @@ tnl_get_per_vertex_fog(GLcontext *ctx) | |||
| #endif | |||
| } | |||
| static GLboolean check_active_shininess( GLcontext *ctx, | |||
| const struct state_key *key, | |||
| GLuint side ) | |||
| { | |||
| GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side); | |||
| if (key->light_color_material_mask & bit) | |||
| return GL_TRUE; | |||
| if (key->light_material_mask & bit) | |||
| return GL_TRUE; | |||
| if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F) | |||
| return GL_TRUE; | |||
| return GL_FALSE; | |||
| } | |||
| static struct state_key *make_state_key( GLcontext *ctx ) | |||
| { | |||
| @@ -167,6 +189,8 @@ static struct state_key *make_state_key( GLcontext *ctx ) | |||
| */ | |||
| assert(fp); | |||
| key->need_eye_coords = ctx->_NeedEyeCoords; | |||
| key->fragprog_inputs_read = fp->Base.InputsRead; | |||
| if (ctx->RenderMode == GL_FEEDBACK) { | |||
| @@ -211,6 +235,17 @@ static struct state_key *make_state_key( GLcontext *ctx ) | |||
| key->unit[i].light_attenuated = 1; | |||
| } | |||
| } | |||
| if (check_active_shininess(ctx, key, 0)) { | |||
| key->material_shininess_is_zero = 0; | |||
| } | |||
| else if (key->light_twoside && | |||
| check_active_shininess(ctx, key, 1)) { | |||
| key->material_shininess_is_zero = 0; | |||
| } | |||
| else { | |||
| key->material_shininess_is_zero = 1; | |||
| } | |||
| } | |||
| if (ctx->Transform.Normalize) | |||
| @@ -270,7 +305,7 @@ static struct state_key *make_state_key( GLcontext *ctx ) | |||
| * generated program with line/function references for each | |||
| * instruction back into this file: | |||
| */ | |||
| #define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM) | |||
| #define DISASSEM 1 | |||
| /* Should be tunable by the driver - do we want to do matrix | |||
| * multiplications with DP4's or with MUL/MAD's? SSE works better | |||
| @@ -309,8 +344,9 @@ struct tnl_program { | |||
| GLuint temp_reserved; | |||
| struct ureg eye_position; | |||
| struct ureg eye_position_z; | |||
| struct ureg eye_position_normalized; | |||
| struct ureg eye_normal; | |||
| struct ureg transformed_normal; | |||
| struct ureg identity; | |||
| GLuint materials; | |||
| @@ -653,9 +689,9 @@ static void emit_normalize_vec3( struct tnl_program *p, | |||
| struct ureg src ) | |||
| { | |||
| struct ureg tmp = get_temp(p); | |||
| emit_op2(p, OPCODE_DP3, tmp, 0, src, src); | |||
| emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); | |||
| emit_op2(p, OPCODE_MUL, dest, 0, src, tmp); | |||
| emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); | |||
| emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); | |||
| emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); | |||
| release_temp(p, tmp); | |||
| } | |||
| @@ -693,6 +729,28 @@ static struct ureg get_eye_position( struct tnl_program *p ) | |||
| } | |||
| static struct ureg get_eye_position_z( struct tnl_program *p ) | |||
| { | |||
| if (!is_undef(p->eye_position)) | |||
| return swizzle1(p->eye_position, Z); | |||
| if (is_undef(p->eye_position_z)) { | |||
| struct ureg pos = register_input( p, VERT_ATTRIB_POS ); | |||
| struct ureg modelview[4]; | |||
| p->eye_position_z = reserve_temp(p); | |||
| register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, | |||
| 0, modelview ); | |||
| emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); | |||
| } | |||
| return p->eye_position_z; | |||
| } | |||
| static struct ureg get_eye_position_normalized( struct tnl_program *p ) | |||
| { | |||
| if (is_undef(p->eye_position_normalized)) { | |||
| @@ -705,36 +763,52 @@ static struct ureg get_eye_position_normalized( struct tnl_program *p ) | |||
| } | |||
| static struct ureg get_eye_normal( struct tnl_program *p ) | |||
| static struct ureg get_transformed_normal( struct tnl_program *p ) | |||
| { | |||
| if (is_undef(p->eye_normal)) { | |||
| if (is_undef(p->transformed_normal) && | |||
| !p->state->need_eye_coords && | |||
| !p->state->normalize && | |||
| !(p->state->need_eye_coords == p->state->rescale_normals)) | |||
| { | |||
| p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); | |||
| } | |||
| else if (is_undef(p->transformed_normal)) | |||
| { | |||
| struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); | |||
| struct ureg mvinv[3]; | |||
| struct ureg transformed_normal = reserve_temp(p); | |||
| register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, | |||
| STATE_MATRIX_INVTRANS, mvinv ); | |||
| p->eye_normal = reserve_temp(p); | |||
| if (p->state->need_eye_coords) { | |||
| register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, | |||
| STATE_MATRIX_INVTRANS, mvinv ); | |||
| /* Transform to eye space: | |||
| */ | |||
| emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); | |||
| /* Transform to eye space: | |||
| */ | |||
| emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); | |||
| normal = transformed_normal; | |||
| } | |||
| /* Normalize/Rescale: | |||
| */ | |||
| if (p->state->normalize) { | |||
| emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); | |||
| emit_normalize_vec3( p, transformed_normal, normal ); | |||
| normal = transformed_normal; | |||
| } | |||
| else if (p->state->rescale_normals) { | |||
| else if (p->state->need_eye_coords == p->state->rescale_normals) { | |||
| /* This is already adjusted for eye/non-eye rendering: | |||
| */ | |||
| struct ureg rescale = register_param2(p, STATE_INTERNAL, | |||
| STATE_NORMAL_SCALE); | |||
| STATE_NORMAL_SCALE); | |||
| emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal, | |||
| swizzle1(rescale, X)); | |||
| emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); | |||
| normal = transformed_normal; | |||
| } | |||
| assert(normal.file == PROGRAM_TEMPORARY); | |||
| p->transformed_normal = normal; | |||
| } | |||
| return p->eye_normal; | |||
| return p->transformed_normal; | |||
| } | |||
| @@ -856,7 +930,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, | |||
| */ | |||
| if (!p->state->unit[i].light_spotcutoff_is_180) { | |||
| struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, | |||
| STATE_SPOT_DIR_NORMALIZED, i); | |||
| STATE_LIGHT_SPOT_DIR_NORMALIZED, i); | |||
| struct ureg spot = get_temp(p); | |||
| struct ureg slt = get_temp(p); | |||
| @@ -895,7 +969,26 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, | |||
| } | |||
| static void emit_degenerate_lit( struct tnl_program *p, | |||
| struct ureg lit, | |||
| struct ureg dots ) | |||
| { | |||
| struct ureg id = get_identity_param(p); | |||
| /* Note that result.x & result.w will not be examined. Note also that | |||
| * dots.xyzw == dots.xxxx. | |||
| */ | |||
| /* result[1] = MAX2(in, 0) | |||
| */ | |||
| emit_op2(p, OPCODE_MAX, lit, 0, id, dots); | |||
| /* result[2] = (in > 0 ? 1 : 0) | |||
| */ | |||
| emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, | |||
| lit, /* 0 */ | |||
| dots); /* in[0] */ | |||
| } | |||
| /* Need to add some addtional parameters to allow lighting in object | |||
| @@ -907,7 +1000,7 @@ static void build_lighting( struct tnl_program *p ) | |||
| const GLboolean twoside = p->state->light_twoside; | |||
| const GLboolean separate = p->state->separate_specular; | |||
| GLuint nr_lights = 0, count = 0; | |||
| struct ureg normal = get_eye_normal(p); | |||
| struct ureg normal = get_transformed_normal(p); | |||
| struct ureg lit = get_temp(p); | |||
| struct ureg dots = get_temp(p); | |||
| struct ureg _col0 = undef, _col1 = undef; | |||
| @@ -921,9 +1014,11 @@ static void build_lighting( struct tnl_program *p ) | |||
| set_material_flags(p); | |||
| { | |||
| struct ureg shininess = get_material(p, 0, STATE_SHININESS); | |||
| emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); | |||
| release_temp(p, shininess); | |||
| if (!p->state->material_shininess_is_zero) { | |||
| struct ureg shininess = get_material(p, 0, STATE_SHININESS); | |||
| emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); | |||
| release_temp(p, shininess); | |||
| } | |||
| _col0 = make_temp(p, get_scenecolor(p, 0)); | |||
| if (separate) | |||
| @@ -934,10 +1029,12 @@ static void build_lighting( struct tnl_program *p ) | |||
| } | |||
| if (twoside) { | |||
| struct ureg shininess = get_material(p, 1, STATE_SHININESS); | |||
| emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, | |||
| negate(swizzle1(shininess,X))); | |||
| release_temp(p, shininess); | |||
| if (!p->state->material_shininess_is_zero) { | |||
| struct ureg shininess = get_material(p, 1, STATE_SHININESS); | |||
| emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, | |||
| negate(swizzle1(shininess,X))); | |||
| release_temp(p, shininess); | |||
| } | |||
| _bfc0 = make_temp(p, get_scenecolor(p, 1)); | |||
| if (separate) | |||
| @@ -984,25 +1081,28 @@ static void build_lighting( struct tnl_program *p ) | |||
| /* Can used precomputed constants in this case. | |||
| * Attenuation never applies to infinite lights. | |||
| */ | |||
| VPpli = register_param3(p, STATE_LIGHT, i, | |||
| STATE_POSITION_NORMALIZED); | |||
| if (p->state->light_local_viewer) { | |||
| struct ureg eye_hat = get_eye_position_normalized(p); | |||
| half = get_temp(p); | |||
| emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
| emit_normalize_vec3(p, half, half); | |||
| } else { | |||
| half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR); | |||
| VPpli = register_param3(p, STATE_INTERNAL, | |||
| STATE_LIGHT_POSITION_NORMALIZED, i); | |||
| if (!p->state->material_shininess_is_zero) { | |||
| if (p->state->light_local_viewer) { | |||
| struct ureg eye_hat = get_eye_position_normalized(p); | |||
| half = get_temp(p); | |||
| emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
| emit_normalize_vec3(p, half, half); | |||
| } else { | |||
| half = register_param3(p, STATE_INTERNAL, | |||
| STATE_LIGHT_HALF_VECTOR, i); | |||
| } | |||
| } | |||
| } | |||
| else { | |||
| struct ureg Ppli = register_param3(p, STATE_LIGHT, i, | |||
| STATE_POSITION); | |||
| struct ureg Ppli = register_param3(p, STATE_INTERNAL, | |||
| STATE_LIGHT_POSITION, i); | |||
| struct ureg V = get_eye_position(p); | |||
| struct ureg dist = get_temp(p); | |||
| VPpli = get_temp(p); | |||
| half = get_temp(p); | |||
| /* Calculate VPpli vector | |||
| */ | |||
| @@ -1024,24 +1124,33 @@ static void build_lighting( struct tnl_program *p ) | |||
| /* Calculate viewer direction, or use infinite viewer: | |||
| */ | |||
| if (p->state->light_local_viewer) { | |||
| struct ureg eye_hat = get_eye_position_normalized(p); | |||
| emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
| } | |||
| else { | |||
| struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); | |||
| emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); | |||
| } | |||
| emit_normalize_vec3(p, half, half); | |||
| if (!p->state->material_shininess_is_zero) { | |||
| half = get_temp(p); | |||
| if (p->state->light_local_viewer) { | |||
| struct ureg eye_hat = get_eye_position_normalized(p); | |||
| emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); | |||
| } | |||
| else { | |||
| struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); | |||
| emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); | |||
| } | |||
| emit_normalize_vec3(p, half, half); | |||
| } | |||
| release_temp(p, dist); | |||
| } | |||
| /* Calculate dot products: | |||
| */ | |||
| emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); | |||
| emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); | |||
| if (p->state->material_shininess_is_zero) { | |||
| emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); | |||
| } | |||
| else { | |||
| emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); | |||
| emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); | |||
| } | |||
| /* Front face lighting: | |||
| */ | |||
| @@ -1052,11 +1161,6 @@ static void build_lighting( struct tnl_program *p ) | |||
| struct ureg res0, res1; | |||
| GLuint mask0, mask1; | |||
| emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
| if (!is_undef(att)) | |||
| emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
| if (count == nr_lights) { | |||
| if (separate) { | |||
| @@ -1078,7 +1182,21 @@ static void build_lighting( struct tnl_program *p ) | |||
| res1 = _col1; | |||
| } | |||
| emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); | |||
| if (!is_undef(att)) { | |||
| emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
| emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
| emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); | |||
| } | |||
| else if (!p->state->material_shininess_is_zero) { | |||
| emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
| emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
| } | |||
| else { | |||
| emit_degenerate_lit(p, lit, dots); | |||
| emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
| } | |||
| emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); | |||
| emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); | |||
| @@ -1096,11 +1214,6 @@ static void build_lighting( struct tnl_program *p ) | |||
| struct ureg res0, res1; | |||
| GLuint mask0, mask1; | |||
| emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z))); | |||
| if (!is_undef(att)) | |||
| emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
| if (count == nr_lights) { | |||
| if (separate) { | |||
| mask0 = WRITEMASK_XYZ; | |||
| @@ -1121,7 +1234,23 @@ static void build_lighting( struct tnl_program *p ) | |||
| mask1 = 0; | |||
| } | |||
| emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); | |||
| dots = negate(swizzle(dots,X,Y,W,Z)); | |||
| if (!is_undef(att)) { | |||
| emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
| emit_op2(p, OPCODE_MUL, lit, 0, lit, att); | |||
| emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); | |||
| } | |||
| else if (!p->state->material_shininess_is_zero) { | |||
| emit_op1(p, OPCODE_LIT, lit, 0, dots); | |||
| emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
| } | |||
| else { | |||
| emit_degenerate_lit(p, lit, dots); | |||
| emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); | |||
| } | |||
| emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); | |||
| emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); | |||
| emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); | |||
| @@ -1146,7 +1275,7 @@ static void build_fog( struct tnl_program *p ) | |||
| struct ureg input; | |||
| if (p->state->fog_source_is_depth) { | |||
| input = swizzle1(get_eye_position(p), Z); | |||
| input = get_eye_position_z(p); | |||
| } | |||
| else { | |||
| input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); | |||
| @@ -1201,7 +1330,7 @@ static void build_reflect_texgen( struct tnl_program *p, | |||
| struct ureg dest, | |||
| GLuint writemask ) | |||
| { | |||
| struct ureg normal = get_eye_normal(p); | |||
| struct ureg normal = get_transformed_normal(p); | |||
| struct ureg eye_hat = get_eye_position_normalized(p); | |||
| struct ureg tmp = get_temp(p); | |||
| @@ -1219,7 +1348,7 @@ static void build_sphere_texgen( struct tnl_program *p, | |||
| struct ureg dest, | |||
| GLuint writemask ) | |||
| { | |||
| struct ureg normal = get_eye_normal(p); | |||
| struct ureg normal = get_transformed_normal(p); | |||
| struct ureg eye_hat = get_eye_position_normalized(p); | |||
| struct ureg tmp = get_temp(p); | |||
| struct ureg half = register_scalar_const(p, .5); | |||
| @@ -1338,7 +1467,7 @@ static void build_texture_transform( struct tnl_program *p ) | |||
| } | |||
| if (normal_mask) { | |||
| struct ureg normal = get_eye_normal(p); | |||
| struct ureg normal = get_transformed_normal(p); | |||
| emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); | |||
| } | |||
| @@ -1376,7 +1505,7 @@ static void build_texture_transform( struct tnl_program *p ) | |||
| static void build_pointsize( struct tnl_program *p ) | |||
| { | |||
| struct ureg eye = get_eye_position(p); | |||
| struct ureg eye = get_eye_position_z(p); | |||
| struct ureg state_size = register_param1(p, STATE_POINT_SIZE); | |||
| struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); | |||
| struct ureg out = register_output(p, VERT_RESULT_PSIZ); | |||
| @@ -1474,8 +1603,9 @@ create_new_program( const struct state_key *key, | |||
| p.state = key; | |||
| p.program = program; | |||
| p.eye_position = undef; | |||
| p.eye_position_z = undef; | |||
| p.eye_position_normalized = undef; | |||
| p.eye_normal = undef; | |||
| p.transformed_normal = undef; | |||
| p.identity = undef; | |||
| p.temp_in_use = 0; | |||
| @@ -1357,6 +1357,7 @@ _mesa_init_lighting( GLcontext *ctx ) | |||
| /* Miscellaneous */ | |||
| ctx->Light._NeedEyeCoords = GL_FALSE; | |||
| ctx->_NeedEyeCoords = GL_FALSE; | |||
| ctx->_ForceEyeCoords = GL_TRUE; | |||
| ctx->_ModelViewInvScale = 1.0; | |||
| } | |||
| @@ -1209,18 +1209,6 @@ _mesa_update_state_locked( GLcontext *ctx ) | |||
| | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR)) | |||
| update_tricaps( ctx, new_state ); | |||
| if (ctx->FragmentProgram._MaintainTexEnvProgram) { | |||
| prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); | |||
| } | |||
| if (ctx->VertexProgram._MaintainTnlProgram) { | |||
| prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX | | |||
| _NEW_TRANSFORM | _NEW_POINT | | |||
| _NEW_FOG | _NEW_LIGHT); | |||
| } | |||
| if (new_state & prog_flags) | |||
| update_program( ctx ); | |||
| /* ctx->_NeedEyeCoords is now up to date. | |||
| * | |||
| * If the truth value of this variable has changed, update for the | |||
| @@ -1233,6 +1221,20 @@ _mesa_update_state_locked( GLcontext *ctx ) | |||
| if (new_state & _MESA_NEW_NEED_EYE_COORDS) | |||
| _mesa_update_tnl_spaces( ctx, new_state ); | |||
| if (ctx->FragmentProgram._MaintainTexEnvProgram) { | |||
| prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); | |||
| } | |||
| if (ctx->VertexProgram._MaintainTnlProgram) { | |||
| prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX | | |||
| _NEW_TRANSFORM | _NEW_POINT | | |||
| _NEW_FOG | _NEW_LIGHT | | |||
| _MESA_NEW_NEED_EYE_COORDS); | |||
| } | |||
| if (new_state & prog_flags) | |||
| update_program( ctx ); | |||
| /* | |||
| * Give the driver a chance to act upon the new_state flags. | |||
| * The driver might plug in different span functions, for example. | |||
| @@ -134,10 +134,6 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], | |||
| value[3] = 1.0; | |||
| } | |||
| return; | |||
| case STATE_POSITION_NORMALIZED: | |||
| COPY_4V(value, ctx->Light.Light[ln].EyePosition); | |||
| NORMALIZE_3FV( value ); | |||
| return; | |||
| default: | |||
| _mesa_problem(ctx, "Invalid light state in fetch_state"); | |||
| return; | |||
| @@ -401,7 +397,11 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], | |||
| case STATE_INTERNAL: | |||
| switch (state[1]) { | |||
| case STATE_NORMAL_SCALE: | |||
| ASSIGN_4V(value, ctx->_ModelViewInvScale, 0, 0, 1); | |||
| ASSIGN_4V(value, | |||
| ctx->_ModelViewInvScale, | |||
| ctx->_ModelViewInvScale, | |||
| ctx->_ModelViewInvScale, | |||
| 1); | |||
| return; | |||
| case STATE_TEXRECT_SCALE: | |||
| { | |||
| @@ -431,15 +431,46 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], | |||
| value[2] = ctx->Fog.Density * ONE_DIV_LN2; | |||
| value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2; | |||
| return; | |||
| case STATE_SPOT_DIR_NORMALIZED: { | |||
| case STATE_LIGHT_SPOT_DIR_NORMALIZED: { | |||
| /* here, state[2] is the light number */ | |||
| /* pre-normalize spot dir */ | |||
| const GLuint ln = (GLuint) state[2]; | |||
| COPY_3V(value, ctx->Light.Light[ln].EyeDirection); | |||
| NORMALIZE_3FV(value); | |||
| COPY_3V(value, ctx->Light.Light[ln]._NormDirection); | |||
| value[3] = ctx->Light.Light[ln]._CosCutoff; | |||
| return; | |||
| } | |||
| case STATE_LIGHT_POSITION: { | |||
| const GLuint ln = (GLuint) state[2]; | |||
| COPY_4V(value, ctx->Light.Light[ln]._Position); | |||
| return; | |||
| } | |||
| case STATE_LIGHT_POSITION_NORMALIZED: { | |||
| const GLuint ln = (GLuint) state[2]; | |||
| COPY_4V(value, ctx->Light.Light[ln]._Position); | |||
| NORMALIZE_3FV( value ); | |||
| return; | |||
| } | |||
| case STATE_LIGHT_HALF_VECTOR: { | |||
| const GLuint ln = (GLuint) state[2]; | |||
| GLfloat p[3]; | |||
| /* Compute infinite half angle vector: | |||
| * halfVector = normalize(normalize(lightPos) + (0, 0, 1)) | |||
| * light.EyePosition.w should be 0 for infinite lights. | |||
| */ | |||
| COPY_3V(p, ctx->Light.Light[ln]._Position); | |||
| NORMALIZE_3FV(p); | |||
| ADD_3V(value, p, ctx->_EyeZDir); | |||
| NORMALIZE_3FV(value); | |||
| value[3] = 1.0; | |||
| return; | |||
| } | |||
| case STATE_PT_SCALE: | |||
| value[0] = ctx->Pixel.RedScale; | |||
| value[1] = ctx->Pixel.GreenScale; | |||
| @@ -696,7 +727,6 @@ append_token(char *dst, gl_state_index k) | |||
| append(dst, "normalScale"); | |||
| break; | |||
| case STATE_INTERNAL: | |||
| case STATE_POSITION_NORMALIZED: | |||
| append(dst, "(internal)"); | |||
| break; | |||
| case STATE_PT_SCALE: | |||
| @@ -106,9 +106,11 @@ typedef enum gl_state_index_ { | |||
| STATE_INTERNAL, /* Mesa additions */ | |||
| STATE_NORMAL_SCALE, | |||
| STATE_TEXRECT_SCALE, | |||
| STATE_POSITION_NORMALIZED, /* normalized light position */ | |||
| STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */ | |||
| STATE_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */ | |||
| STATE_LIGHT_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */ | |||
| STATE_LIGHT_POSITION, /* object vs eye space */ | |||
| STATE_LIGHT_POSITION_NORMALIZED, /* object vs eye space */ | |||
| STATE_LIGHT_HALF_VECTOR, /* object vs eye space */ | |||
| STATE_PT_SCALE, /**< Pixel transfer RGBA scale */ | |||
| STATE_PT_BIAS, /**< Pixel transfer RGBA bias */ | |||
| STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ | |||