Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spu_vertex_fetch.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /**************************************************************************
  2. *
  3. * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /*
  28. * Authors:
  29. * Keith Whitwell <keith@tungstengraphics.com>
  30. */
  31. #include <spu_mfcio.h>
  32. #include "pipe/p_util.h"
  33. #include "pipe/p_state.h"
  34. #include "pipe/p_shader_tokens.h"
  35. #include "spu_exec.h"
  36. #include "spu_vertex_shader.h"
  37. #include "spu_main.h"
  38. #define DRAW_DBG 0
  39. static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 };
  40. /**
  41. * Fetch a float[4] vertex attribute from memory, doing format/type
  42. * conversion as needed.
  43. *
  44. * This is probably needed/dupliocated elsewhere, eg format
  45. * conversion, texture sampling etc.
  46. */
  47. #define FETCH_ATTRIB( NAME, SZ, CVT ) \
  48. static qword \
  49. fetch_##NAME(const void *ptr) \
  50. { \
  51. vec_float4 attrib = defaults; \
  52. int i; \
  53. \
  54. for (i = 0; i < SZ; i++) { \
  55. attrib = spu_insert(CVT, attrib, i); \
  56. } \
  57. return (qword) attrib; \
  58. }
  59. #define CVT_64_FLOAT (float) ((double *) ptr)[i]
  60. #define CVT_32_FLOAT ((float *) ptr)[i]
  61. #define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
  62. #define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
  63. #define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
  64. #define CVT_8_SSCALED (float) ((char *) ptr)[i]
  65. #define CVT_16_SSCALED (float) ((short *) ptr)[i]
  66. #define CVT_32_SSCALED (float) ((int *) ptr)[i]
  67. #define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
  68. #define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
  69. #define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
  70. #define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
  71. #define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
  72. #define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
  73. FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
  74. FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
  75. FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
  76. FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
  77. FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
  78. FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
  79. FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
  80. FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
  81. FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
  82. FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
  83. FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
  84. FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
  85. FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
  86. FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
  87. FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
  88. FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
  89. FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
  90. FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
  91. FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
  92. FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
  93. FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
  94. FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
  95. FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
  96. FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
  97. FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
  98. FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
  99. FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
  100. FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
  101. FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
  102. FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
  103. FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
  104. FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
  105. FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
  106. FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
  107. FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
  108. FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
  109. FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
  110. FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
  111. FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
  112. FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
  113. FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
  114. FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
  115. FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
  116. FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
  117. FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
  118. FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
  119. FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
  120. FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
  121. FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
  122. FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
  123. FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
  124. FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
  125. FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
  126. FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
  127. FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
  128. FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
  129. FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
  130. //FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
  131. static spu_fetch_func get_fetch_func( enum pipe_format format )
  132. {
  133. #if 0
  134. {
  135. char tmp[80];
  136. pf_sprint_name(tmp, format);
  137. _mesa_printf("%s: %s\n", __FUNCTION__, tmp);
  138. }
  139. #endif
  140. switch (format) {
  141. case PIPE_FORMAT_R64_FLOAT:
  142. return fetch_R64_FLOAT;
  143. case PIPE_FORMAT_R64G64_FLOAT:
  144. return fetch_R64G64_FLOAT;
  145. case PIPE_FORMAT_R64G64B64_FLOAT:
  146. return fetch_R64G64B64_FLOAT;
  147. case PIPE_FORMAT_R64G64B64A64_FLOAT:
  148. return fetch_R64G64B64A64_FLOAT;
  149. case PIPE_FORMAT_R32_FLOAT:
  150. return fetch_R32_FLOAT;
  151. case PIPE_FORMAT_R32G32_FLOAT:
  152. return fetch_R32G32_FLOAT;
  153. case PIPE_FORMAT_R32G32B32_FLOAT:
  154. return fetch_R32G32B32_FLOAT;
  155. case PIPE_FORMAT_R32G32B32A32_FLOAT:
  156. return fetch_R32G32B32A32_FLOAT;
  157. case PIPE_FORMAT_R32_UNORM:
  158. return fetch_R32_UNORM;
  159. case PIPE_FORMAT_R32G32_UNORM:
  160. return fetch_R32G32_UNORM;
  161. case PIPE_FORMAT_R32G32B32_UNORM:
  162. return fetch_R32G32B32_UNORM;
  163. case PIPE_FORMAT_R32G32B32A32_UNORM:
  164. return fetch_R32G32B32A32_UNORM;
  165. case PIPE_FORMAT_R32_USCALED:
  166. return fetch_R32_USCALED;
  167. case PIPE_FORMAT_R32G32_USCALED:
  168. return fetch_R32G32_USCALED;
  169. case PIPE_FORMAT_R32G32B32_USCALED:
  170. return fetch_R32G32B32_USCALED;
  171. case PIPE_FORMAT_R32G32B32A32_USCALED:
  172. return fetch_R32G32B32A32_USCALED;
  173. case PIPE_FORMAT_R32_SNORM:
  174. return fetch_R32_SNORM;
  175. case PIPE_FORMAT_R32G32_SNORM:
  176. return fetch_R32G32_SNORM;
  177. case PIPE_FORMAT_R32G32B32_SNORM:
  178. return fetch_R32G32B32_SNORM;
  179. case PIPE_FORMAT_R32G32B32A32_SNORM:
  180. return fetch_R32G32B32A32_SNORM;
  181. case PIPE_FORMAT_R32_SSCALED:
  182. return fetch_R32_SSCALED;
  183. case PIPE_FORMAT_R32G32_SSCALED:
  184. return fetch_R32G32_SSCALED;
  185. case PIPE_FORMAT_R32G32B32_SSCALED:
  186. return fetch_R32G32B32_SSCALED;
  187. case PIPE_FORMAT_R32G32B32A32_SSCALED:
  188. return fetch_R32G32B32A32_SSCALED;
  189. case PIPE_FORMAT_R16_UNORM:
  190. return fetch_R16_UNORM;
  191. case PIPE_FORMAT_R16G16_UNORM:
  192. return fetch_R16G16_UNORM;
  193. case PIPE_FORMAT_R16G16B16_UNORM:
  194. return fetch_R16G16B16_UNORM;
  195. case PIPE_FORMAT_R16G16B16A16_UNORM:
  196. return fetch_R16G16B16A16_UNORM;
  197. case PIPE_FORMAT_R16_USCALED:
  198. return fetch_R16_USCALED;
  199. case PIPE_FORMAT_R16G16_USCALED:
  200. return fetch_R16G16_USCALED;
  201. case PIPE_FORMAT_R16G16B16_USCALED:
  202. return fetch_R16G16B16_USCALED;
  203. case PIPE_FORMAT_R16G16B16A16_USCALED:
  204. return fetch_R16G16B16A16_USCALED;
  205. case PIPE_FORMAT_R16_SNORM:
  206. return fetch_R16_SNORM;
  207. case PIPE_FORMAT_R16G16_SNORM:
  208. return fetch_R16G16_SNORM;
  209. case PIPE_FORMAT_R16G16B16_SNORM:
  210. return fetch_R16G16B16_SNORM;
  211. case PIPE_FORMAT_R16G16B16A16_SNORM:
  212. return fetch_R16G16B16A16_SNORM;
  213. case PIPE_FORMAT_R16_SSCALED:
  214. return fetch_R16_SSCALED;
  215. case PIPE_FORMAT_R16G16_SSCALED:
  216. return fetch_R16G16_SSCALED;
  217. case PIPE_FORMAT_R16G16B16_SSCALED:
  218. return fetch_R16G16B16_SSCALED;
  219. case PIPE_FORMAT_R16G16B16A16_SSCALED:
  220. return fetch_R16G16B16A16_SSCALED;
  221. case PIPE_FORMAT_R8_UNORM:
  222. return fetch_R8_UNORM;
  223. case PIPE_FORMAT_R8G8_UNORM:
  224. return fetch_R8G8_UNORM;
  225. case PIPE_FORMAT_R8G8B8_UNORM:
  226. return fetch_R8G8B8_UNORM;
  227. case PIPE_FORMAT_R8G8B8A8_UNORM:
  228. return fetch_R8G8B8A8_UNORM;
  229. case PIPE_FORMAT_R8_USCALED:
  230. return fetch_R8_USCALED;
  231. case PIPE_FORMAT_R8G8_USCALED:
  232. return fetch_R8G8_USCALED;
  233. case PIPE_FORMAT_R8G8B8_USCALED:
  234. return fetch_R8G8B8_USCALED;
  235. case PIPE_FORMAT_R8G8B8A8_USCALED:
  236. return fetch_R8G8B8A8_USCALED;
  237. case PIPE_FORMAT_R8_SNORM:
  238. return fetch_R8_SNORM;
  239. case PIPE_FORMAT_R8G8_SNORM:
  240. return fetch_R8G8_SNORM;
  241. case PIPE_FORMAT_R8G8B8_SNORM:
  242. return fetch_R8G8B8_SNORM;
  243. case PIPE_FORMAT_R8G8B8A8_SNORM:
  244. return fetch_R8G8B8A8_SNORM;
  245. case PIPE_FORMAT_R8_SSCALED:
  246. return fetch_R8_SSCALED;
  247. case PIPE_FORMAT_R8G8_SSCALED:
  248. return fetch_R8G8_SSCALED;
  249. case PIPE_FORMAT_R8G8B8_SSCALED:
  250. return fetch_R8G8B8_SSCALED;
  251. case PIPE_FORMAT_R8G8B8A8_SSCALED:
  252. return fetch_R8G8B8A8_SSCALED;
  253. case PIPE_FORMAT_A8R8G8B8_UNORM:
  254. return fetch_A8R8G8B8_UNORM;
  255. case 0:
  256. return NULL; /* not sure why this is needed */
  257. default:
  258. assert(0);
  259. return NULL;
  260. }
  261. }
  262. void
  263. spu_transpose_4x4(qword *out, const qword *in)
  264. {
  265. static const qword masks[8] = {
  266. {
  267. 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
  268. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  269. },
  270. {
  271. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  272. 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
  273. },
  274. {
  275. 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
  276. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  277. },
  278. {
  279. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  280. 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
  281. },
  282. {
  283. 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
  284. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  285. },
  286. {
  287. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  288. 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
  289. },
  290. {
  291. 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f,
  292. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  293. },
  294. {
  295. 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  296. 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f,
  297. },
  298. };
  299. out[0] = si_shufb(in[0], in[1], masks[0]);
  300. out[0] = si_or(out[0], si_shufb(in[2], in[3], masks[1]));
  301. out[1] = si_shufb(in[0], in[1], masks[2]);
  302. out[1] = si_or(out[1], si_shufb(in[2], in[3], masks[3]));
  303. out[2] = si_shufb(in[0], in[1], masks[4]);
  304. out[2] = si_or(out[2], si_shufb(in[2], in[3], masks[5]));
  305. out[3] = si_shufb(in[0], in[1], masks[6]);
  306. out[3] = si_or(out[3], si_shufb(in[2], in[3], masks[7]));
  307. }
  308. /**
  309. * Fetch vertex attributes for 'count' vertices.
  310. */
  311. static void generic_vertex_fetch(struct spu_vs_context *draw,
  312. struct spu_exec_machine *machine,
  313. const unsigned *elts,
  314. unsigned count)
  315. {
  316. unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
  317. unsigned attr;
  318. assert(count <= 4);
  319. wait_on_mask(1 << TAG_VERTEX_BUFFER);
  320. // _mesa_printf("%s %d\n", __FUNCTION__, count);
  321. /* loop over vertex attributes (vertex shader inputs)
  322. */
  323. for (attr = 0; attr < nr_attrs; attr++) {
  324. const unsigned pitch = draw->vertex_fetch.pitch[attr];
  325. const uint64_t src = draw->vertex_fetch.src_ptr[attr];
  326. const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr];
  327. unsigned i;
  328. qword p[4];
  329. /* Fetch four attributes for four vertices.
  330. *
  331. * Could fetch directly into AOS format, but this is meant to be
  332. * a prototype for an sse implementation, which would have
  333. * difficulties doing that.
  334. */
  335. for (i = 0; i < count; i++) {
  336. uint8_t buffer[32] ALIGN16_ATTRIB;
  337. const uint64_t addr = src + (elts[i] * pitch);
  338. const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
  339. mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
  340. wait_on_mask(1 << TAG_VERTEX_BUFFER);
  341. p[i] = (*fetch)(buffer + (addr & 0x0f));
  342. }
  343. /* Be nice and zero out any missing vertices:
  344. */
  345. for (/* empty */; i < 4; i++)
  346. p[i] = si_xor(p[i], p[i]);
  347. /* Transpose/swizzle into vector-friendly format. Currently
  348. * assuming that all vertex shader inputs are float[4], but this
  349. * isn't true -- if the vertex shader only wants tex0.xy, we
  350. * could optimize for that.
  351. *
  352. * To do so fully without codegen would probably require an
  353. * excessive number of fetch functions, but we could at least
  354. * minimize the transpose step:
  355. */
  356. spu_transpose_4x4(&machine->Inputs[attr].xyzw[0].q, p);
  357. }
  358. }
  359. void spu_update_vertex_fetch( struct spu_vs_context *draw )
  360. {
  361. unsigned i;
  362. for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
  363. draw->vertex_fetch.fetch[i] =
  364. get_fetch_func(draw->vertex_fetch.format[i]);
  365. }
  366. draw->vertex_fetch.fetch_func = generic_vertex_fetch;
  367. }