| 
				
			 | 
			
			 | 
			@@ -31,6 +31,8 @@ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			  */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include <spu_mfcio.h> | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include <transpose_matrix4x4.h> | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include "pipe/p_util.h" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include "pipe/p_state.h" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include "pipe/p_shader_tokens.h" | 
		
		
	
	
		
			
			| 
				
			 | 
			
			 | 
			@@ -308,61 +310,6 @@ static spu_fetch_func get_fetch_func( enum pipe_format format ) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			void | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			spu_transpose_4x4(qword *out, const qword *in) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   static const qword masks[8] = { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      {  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      {  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      {  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      {  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      {  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      }, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   }; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[0] = si_shufb(in[0], in[1], masks[0]); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[0] = si_or(out[0], si_shufb(in[2], in[3], masks[1])); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[1] = si_shufb(in[0], in[1], masks[2]); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[1] = si_or(out[1], si_shufb(in[2], in[3], masks[3])); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[2] = si_shufb(in[0], in[1], masks[4]); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[2] = si_or(out[2], si_shufb(in[2], in[3], masks[5])); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[3] = si_shufb(in[0], in[1], masks[6]); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   out[3] = si_or(out[3], si_shufb(in[2], in[3], masks[7])); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/** | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Fetch vertex attributes for 'count' vertices. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 */ | 
		
		
	
	
		
			
			| 
				
			 | 
			
			 | 
			@@ -427,7 +374,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       * excessive number of fetch functions, but we could at least | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       * minimize the transpose step: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      spu_transpose_4x4(&machine->Inputs[attr].xyzw[0].q, p); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      _transpose_matrix4x4(&machine->Inputs[attr].xyzw[0].q, p); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  |