Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

codegen.c 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824
  1. /*
  2. * Copyright (c) 2017 Lima Project
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sub license,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the
  12. * next paragraph) shall be included in all copies or substantial portions
  13. * of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21. * DEALINGS IN THE SOFTWARE.
  22. *
  23. */
  24. #include "util/ralloc.h"
  25. #include "util/u_half.h"
  26. #include "util/bitscan.h"
  27. #include "ppir.h"
  28. #include "codegen.h"
  29. #include "lima_context.h"
  30. static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
  31. {
  32. unsigned ret = 0;
  33. for (int i = 0; i < 4; i++)
  34. ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
  35. return ret;
  36. }
  37. static int get_scl_reg_index(ppir_src *src, int component)
  38. {
  39. int ret = ppir_target_get_src_reg_index(src);
  40. ret += src->swizzle[component];
  41. return ret;
  42. }
  43. static void ppir_codegen_encode_varying(ppir_node *node, void *code)
  44. {
  45. ppir_codegen_field_varying *f = code;
  46. ppir_load_node *load = ppir_node_to_load(node);
  47. ppir_dest *dest = &load->dest;
  48. int index = ppir_target_get_dest_reg_index(dest);
  49. int num_components = load->num_components;
  50. if (node->op != ppir_op_load_coords_reg) {
  51. assert(node->op == ppir_op_load_varying ||
  52. node->op == ppir_op_load_coords ||
  53. node->op == ppir_op_load_fragcoord ||
  54. node->op == ppir_op_load_pointcoord ||
  55. node->op == ppir_op_load_frontface);
  56. f->imm.dest = index >> 2;
  57. f->imm.mask = dest->write_mask << (index & 0x3);
  58. int alignment = num_components == 3 ? 3 : num_components - 1;
  59. f->imm.alignment = alignment;
  60. if (load->num_src) {
  61. index = ppir_target_get_src_reg_index(&load->src);
  62. f->imm.offset_vector = index >> 2;
  63. f->imm.offset_scalar = index & 0x3;
  64. } else
  65. f->imm.offset_vector = 0xf;
  66. if (alignment == 3)
  67. f->imm.index = load->index >> 2;
  68. else
  69. f->imm.index = load->index >> alignment;
  70. switch (node->op) {
  71. case ppir_op_load_fragcoord:
  72. f->imm.source_type = 2;
  73. f->imm.perspective = 3;
  74. break;
  75. case ppir_op_load_pointcoord:
  76. f->imm.source_type = 3;
  77. break;
  78. case ppir_op_load_frontface:
  79. f->imm.source_type = 3;
  80. f->imm.perspective = 1;
  81. break;
  82. case ppir_op_load_coords:
  83. /* num_components == 3 implies cubemap as we don't support 3D textures */
  84. f->imm.source_type = num_components == 3 ? 2 : 0;
  85. break;
  86. default:
  87. break;
  88. }
  89. }
  90. else { /* node->op == ppir_op_load_coords_reg */
  91. f->reg.dest = index >> 2;
  92. f->reg.mask = dest->write_mask << (index & 0x3);
  93. if (load->num_src) {
  94. /* num_components == 3 implies cubemap as we don't support 3D textures */
  95. if (num_components == 3) {
  96. f->reg.source_type = 2;
  97. f->reg.perspective = 1;
  98. } else {
  99. f->reg.source_type = 1;
  100. }
  101. ppir_src *src = &load->src;
  102. index = ppir_target_get_src_reg_index(src);
  103. f->reg.source = index >> 2;
  104. f->reg.negate = src->negate;
  105. f->reg.absolute = src->absolute;
  106. f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
  107. }
  108. }
  109. }
  110. static void ppir_codegen_encode_texld(ppir_node *node, void *code)
  111. {
  112. ppir_codegen_field_sampler *f = code;
  113. ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
  114. f->index = ldtex->sampler;
  115. f->lod_bias_en = ldtex->lod_bias_en;
  116. f->explicit_lod = ldtex->explicit_lod;
  117. if (ldtex->lod_bias_en)
  118. ppir_target_get_src_reg_index(&ldtex->lod_bias);
  119. switch (ldtex->sampler_dim) {
  120. case GLSL_SAMPLER_DIM_2D:
  121. case GLSL_SAMPLER_DIM_RECT:
  122. case GLSL_SAMPLER_DIM_EXTERNAL:
  123. f->type = ppir_codegen_sampler_type_2d;
  124. break;
  125. case GLSL_SAMPLER_DIM_CUBE:
  126. f->type = ppir_codegen_sampler_type_cube;
  127. break;
  128. default:
  129. break;
  130. }
  131. f->offset_en = 0;
  132. f->unknown_2 = 0x39001;
  133. }
  134. static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
  135. {
  136. ppir_codegen_field_uniform *f = code;
  137. ppir_load_node *load = ppir_node_to_load(node);
  138. switch (node->op) {
  139. case ppir_op_load_uniform:
  140. f->source = ppir_codegen_uniform_src_uniform;
  141. break;
  142. case ppir_op_load_temp:
  143. f->source = ppir_codegen_uniform_src_temporary;
  144. break;
  145. default:
  146. assert(0);
  147. }
  148. /* Uniforms are always aligned to vec4 boundary */
  149. f->alignment = 2;
  150. f->index = load->index;
  151. if (load->num_src) {
  152. f->offset_en = 1;
  153. f->offset_reg = ppir_target_get_src_reg_index(&load->src);
  154. }
  155. }
  156. static unsigned shift_to_op(int shift)
  157. {
  158. assert(shift >= -3 && shift <= 3);
  159. return shift < 0 ? shift + 8 : shift;
  160. }
  161. static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
  162. {
  163. ppir_codegen_field_vec4_mul *f = code;
  164. ppir_alu_node *alu = ppir_node_to_alu(node);
  165. ppir_dest *dest = &alu->dest;
  166. int dest_shift = 0;
  167. if (dest->type != ppir_target_pipeline) {
  168. int index = ppir_target_get_dest_reg_index(dest);
  169. dest_shift = index & 0x3;
  170. f->dest = index >> 2;
  171. f->mask = dest->write_mask << dest_shift;
  172. }
  173. f->dest_modifier = dest->modifier;
  174. switch (node->op) {
  175. case ppir_op_mul:
  176. f->op = shift_to_op(alu->shift);
  177. break;
  178. case ppir_op_mov:
  179. case ppir_op_store_color:
  180. f->op = ppir_codegen_vec4_mul_op_mov;
  181. break;
  182. case ppir_op_max:
  183. f->op = ppir_codegen_vec4_mul_op_max;
  184. break;
  185. case ppir_op_min:
  186. f->op = ppir_codegen_vec4_mul_op_min;
  187. break;
  188. case ppir_op_and:
  189. f->op = ppir_codegen_vec4_mul_op_and;
  190. break;
  191. case ppir_op_or:
  192. f->op = ppir_codegen_vec4_mul_op_or;
  193. break;
  194. case ppir_op_xor:
  195. f->op = ppir_codegen_vec4_mul_op_xor;
  196. break;
  197. case ppir_op_gt:
  198. f->op = ppir_codegen_vec4_mul_op_gt;
  199. break;
  200. case ppir_op_ge:
  201. f->op = ppir_codegen_vec4_mul_op_ge;
  202. break;
  203. case ppir_op_eq:
  204. f->op = ppir_codegen_vec4_mul_op_eq;
  205. break;
  206. case ppir_op_ne:
  207. f->op = ppir_codegen_vec4_mul_op_ne;
  208. break;
  209. case ppir_op_not:
  210. f->op = ppir_codegen_vec4_mul_op_not;
  211. break;
  212. default:
  213. break;
  214. }
  215. ppir_src *src = alu->src;
  216. int index = ppir_target_get_src_reg_index(src);
  217. f->arg0_source = index >> 2;
  218. f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
  219. f->arg0_absolute = src->absolute;
  220. f->arg0_negate = src->negate;
  221. if (alu->num_src == 2) {
  222. src = alu->src + 1;
  223. index = ppir_target_get_src_reg_index(src);
  224. f->arg1_source = index >> 2;
  225. f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
  226. f->arg1_absolute = src->absolute;
  227. f->arg1_negate = src->negate;
  228. }
  229. }
  230. static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
  231. {
  232. ppir_codegen_field_float_mul *f = code;
  233. ppir_alu_node *alu = ppir_node_to_alu(node);
  234. ppir_dest *dest = &alu->dest;
  235. int dest_component = ffs(dest->write_mask) - 1;
  236. assert(dest_component >= 0);
  237. if (dest->type != ppir_target_pipeline) {
  238. f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
  239. f->output_en = true;
  240. }
  241. f->dest_modifier = dest->modifier;
  242. switch (node->op) {
  243. case ppir_op_mul:
  244. f->op = shift_to_op(alu->shift);
  245. break;
  246. case ppir_op_mov:
  247. f->op = ppir_codegen_float_mul_op_mov;
  248. break;
  249. case ppir_op_sel_cond:
  250. f->op = ppir_codegen_float_mul_op_mov;
  251. break;
  252. case ppir_op_max:
  253. f->op = ppir_codegen_float_mul_op_max;
  254. break;
  255. case ppir_op_min:
  256. f->op = ppir_codegen_float_mul_op_min;
  257. break;
  258. case ppir_op_and:
  259. f->op = ppir_codegen_float_mul_op_and;
  260. break;
  261. case ppir_op_or:
  262. f->op = ppir_codegen_float_mul_op_or;
  263. break;
  264. case ppir_op_xor:
  265. f->op = ppir_codegen_float_mul_op_xor;
  266. break;
  267. case ppir_op_gt:
  268. f->op = ppir_codegen_float_mul_op_gt;
  269. break;
  270. case ppir_op_ge:
  271. f->op = ppir_codegen_float_mul_op_ge;
  272. break;
  273. case ppir_op_eq:
  274. f->op = ppir_codegen_float_mul_op_eq;
  275. break;
  276. case ppir_op_ne:
  277. f->op = ppir_codegen_float_mul_op_ne;
  278. break;
  279. case ppir_op_not:
  280. f->op = ppir_codegen_float_mul_op_not;
  281. break;
  282. default:
  283. break;
  284. }
  285. ppir_src *src = alu->src;
  286. f->arg0_source = get_scl_reg_index(src, dest_component);
  287. f->arg0_absolute = src->absolute;
  288. f->arg0_negate = src->negate;
  289. if (alu->num_src == 2) {
  290. src = alu->src + 1;
  291. f->arg1_source = get_scl_reg_index(src, dest_component);
  292. f->arg1_absolute = src->absolute;
  293. f->arg1_negate = src->negate;
  294. }
  295. }
  296. static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
  297. {
  298. ppir_codegen_field_vec4_acc *f = code;
  299. ppir_alu_node *alu = ppir_node_to_alu(node);
  300. ppir_dest *dest = &alu->dest;
  301. int index = ppir_target_get_dest_reg_index(dest);
  302. int dest_shift = index & 0x3;
  303. f->dest = index >> 2;
  304. f->mask = dest->write_mask << dest_shift;
  305. f->dest_modifier = dest->modifier;
  306. switch (node->op) {
  307. case ppir_op_add:
  308. f->op = ppir_codegen_vec4_acc_op_add;
  309. break;
  310. case ppir_op_mov:
  311. case ppir_op_store_color:
  312. f->op = ppir_codegen_vec4_acc_op_mov;
  313. break;
  314. case ppir_op_sum3:
  315. f->op = ppir_codegen_vec4_acc_op_sum3;
  316. dest_shift = 0;
  317. break;
  318. case ppir_op_sum4:
  319. f->op = ppir_codegen_vec4_acc_op_sum4;
  320. dest_shift = 0;
  321. break;
  322. case ppir_op_floor:
  323. f->op = ppir_codegen_vec4_acc_op_floor;
  324. break;
  325. case ppir_op_ceil:
  326. f->op = ppir_codegen_vec4_acc_op_ceil;
  327. break;
  328. case ppir_op_fract:
  329. f->op = ppir_codegen_vec4_acc_op_fract;
  330. break;
  331. case ppir_op_gt:
  332. f->op = ppir_codegen_vec4_acc_op_gt;
  333. break;
  334. case ppir_op_ge:
  335. f->op = ppir_codegen_vec4_acc_op_ge;
  336. break;
  337. case ppir_op_eq:
  338. f->op = ppir_codegen_vec4_acc_op_eq;
  339. break;
  340. case ppir_op_ne:
  341. f->op = ppir_codegen_vec4_acc_op_ne;
  342. break;
  343. case ppir_op_select:
  344. f->op = ppir_codegen_vec4_acc_op_sel;
  345. break;
  346. case ppir_op_max:
  347. f->op = ppir_codegen_vec4_acc_op_max;
  348. break;
  349. case ppir_op_min:
  350. f->op = ppir_codegen_vec4_acc_op_min;
  351. break;
  352. case ppir_op_ddx:
  353. f->op = ppir_codegen_vec4_acc_op_dFdx;
  354. break;
  355. case ppir_op_ddy:
  356. f->op = ppir_codegen_vec4_acc_op_dFdy;
  357. break;
  358. default:
  359. break;
  360. }
  361. ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
  362. index = ppir_target_get_src_reg_index(src);
  363. if (src->type == ppir_target_pipeline &&
  364. src->pipeline == ppir_pipeline_reg_vmul)
  365. f->mul_in = true;
  366. else
  367. f->arg0_source = index >> 2;
  368. f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
  369. f->arg0_absolute = src->absolute;
  370. f->arg0_negate = src->negate;
  371. if (++src < alu->src + alu->num_src) {
  372. index = ppir_target_get_src_reg_index(src);
  373. f->arg1_source = index >> 2;
  374. f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
  375. f->arg1_absolute = src->absolute;
  376. f->arg1_negate = src->negate;
  377. }
  378. }
  379. static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
  380. {
  381. ppir_codegen_field_float_acc *f = code;
  382. ppir_alu_node *alu = ppir_node_to_alu(node);
  383. ppir_dest *dest = &alu->dest;
  384. int dest_component = ffs(dest->write_mask) - 1;
  385. assert(dest_component >= 0);
  386. f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
  387. f->output_en = true;
  388. f->dest_modifier = dest->modifier;
  389. switch (node->op) {
  390. case ppir_op_add:
  391. f->op = shift_to_op(alu->shift);
  392. break;
  393. case ppir_op_mov:
  394. f->op = ppir_codegen_float_acc_op_mov;
  395. break;
  396. case ppir_op_max:
  397. f->op = ppir_codegen_float_acc_op_max;
  398. break;
  399. case ppir_op_min:
  400. f->op = ppir_codegen_float_acc_op_min;
  401. break;
  402. case ppir_op_floor:
  403. f->op = ppir_codegen_float_acc_op_floor;
  404. break;
  405. case ppir_op_ceil:
  406. f->op = ppir_codegen_float_acc_op_ceil;
  407. break;
  408. case ppir_op_fract:
  409. f->op = ppir_codegen_float_acc_op_fract;
  410. break;
  411. case ppir_op_gt:
  412. f->op = ppir_codegen_float_acc_op_gt;
  413. break;
  414. case ppir_op_ge:
  415. f->op = ppir_codegen_float_acc_op_ge;
  416. break;
  417. case ppir_op_eq:
  418. f->op = ppir_codegen_float_acc_op_eq;
  419. break;
  420. case ppir_op_ne:
  421. f->op = ppir_codegen_float_acc_op_ne;
  422. break;
  423. case ppir_op_select:
  424. f->op = ppir_codegen_float_acc_op_sel;
  425. break;
  426. case ppir_op_ddx:
  427. f->op = ppir_codegen_float_acc_op_dFdx;
  428. break;
  429. case ppir_op_ddy:
  430. f->op = ppir_codegen_float_acc_op_dFdy;
  431. break;
  432. default:
  433. break;
  434. }
  435. ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
  436. if (src->type == ppir_target_pipeline &&
  437. src->pipeline == ppir_pipeline_reg_fmul)
  438. f->mul_in = true;
  439. else
  440. f->arg0_source = get_scl_reg_index(src, dest_component);
  441. f->arg0_absolute = src->absolute;
  442. f->arg0_negate = src->negate;
  443. if (++src < alu->src + alu->num_src) {
  444. f->arg1_source = get_scl_reg_index(src, dest_component);
  445. f->arg1_absolute = src->absolute;
  446. f->arg1_negate = src->negate;
  447. }
  448. }
  449. static void ppir_codegen_encode_combine(ppir_node *node, void *code)
  450. {
  451. ppir_codegen_field_combine *f = code;
  452. ppir_alu_node *alu = ppir_node_to_alu(node);
  453. switch (node->op) {
  454. case ppir_op_rsqrt:
  455. case ppir_op_log2:
  456. case ppir_op_exp2:
  457. case ppir_op_rcp:
  458. case ppir_op_sqrt:
  459. case ppir_op_sin:
  460. case ppir_op_cos:
  461. {
  462. f->scalar.dest_vec = false;
  463. f->scalar.arg1_en = false;
  464. ppir_dest *dest = &alu->dest;
  465. int dest_component = ffs(dest->write_mask) - 1;
  466. assert(dest_component >= 0);
  467. f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
  468. f->scalar.dest_modifier = dest->modifier;
  469. ppir_src *src = alu->src;
  470. f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
  471. f->scalar.arg0_absolute = src->absolute;
  472. f->scalar.arg0_negate = src->negate;
  473. switch (node->op) {
  474. case ppir_op_rsqrt:
  475. f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
  476. break;
  477. case ppir_op_log2:
  478. f->scalar.op = ppir_codegen_combine_scalar_op_log2;
  479. break;
  480. case ppir_op_exp2:
  481. f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
  482. break;
  483. case ppir_op_rcp:
  484. f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
  485. break;
  486. case ppir_op_sqrt:
  487. f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
  488. break;
  489. case ppir_op_sin:
  490. f->scalar.op = ppir_codegen_combine_scalar_op_sin;
  491. break;
  492. case ppir_op_cos:
  493. f->scalar.op = ppir_codegen_combine_scalar_op_cos;
  494. break;
  495. default:
  496. break;
  497. }
  498. }
  499. default:
  500. break;
  501. }
  502. }
  503. static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
  504. {
  505. assert(node->op == ppir_op_store_temp);
  506. ppir_codegen_field_temp_write *f = code;
  507. ppir_store_node *snode = ppir_node_to_store(node);
  508. int num_components = snode->num_components;
  509. f->temp_write.dest = 0x03; // 11 - temporary
  510. f->temp_write.source = snode->src.reg->index;
  511. int alignment = num_components == 4 ? 2 : num_components - 1;
  512. f->temp_write.alignment = alignment;
  513. f->temp_write.index = snode->index << (2 - alignment);
  514. f->temp_write.offset_reg = snode->index >> 2;
  515. }
  516. static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
  517. {
  518. for (int i = 0; i < constant->num; i++)
  519. code[i] = util_float_to_half(constant->value[i].f);
  520. }
  521. static void ppir_codegen_encode_discard(ppir_node *node, void *code)
  522. {
  523. ppir_codegen_field_branch *b = code;
  524. assert(node->op == ppir_op_discard);
  525. b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
  526. b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
  527. b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
  528. }
  529. static void ppir_codegen_encode_branch(ppir_node *node, void *code)
  530. {
  531. ppir_codegen_field_branch *b = code;
  532. ppir_branch_node *branch;
  533. ppir_instr *target_instr;
  534. ppir_block *target;
  535. if (node->op == ppir_op_discard) {
  536. ppir_codegen_encode_discard(node, code);
  537. return;
  538. }
  539. assert(node->op == ppir_op_branch);
  540. branch = ppir_node_to_branch(node);
  541. b->branch.unknown_0 = 0x0;
  542. b->branch.unknown_1 = 0x0;
  543. if (branch->num_src == 2) {
  544. b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
  545. b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
  546. b->branch.cond_gt = branch->cond_gt;
  547. b->branch.cond_eq = branch->cond_eq;
  548. b->branch.cond_lt = branch->cond_lt;
  549. } else if (branch->num_src == 0) {
  550. /* Unconditional branch */
  551. b->branch.arg0_source = 0;
  552. b->branch.arg1_source = 0;
  553. b->branch.cond_gt = true;
  554. b->branch.cond_eq = true;
  555. b->branch.cond_lt = true;
  556. } else {
  557. assert(false);
  558. }
  559. target = branch->target;
  560. while (list_is_empty(&target->instr_list)) {
  561. if (!target->list.next)
  562. break;
  563. target = LIST_ENTRY(ppir_block, target->list.next, list);
  564. }
  565. assert(!list_is_empty(&target->instr_list));
  566. target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
  567. b->branch.target = target_instr->offset - node->instr->offset;
  568. b->branch.next_count = target_instr->encode_size;
  569. }
  570. typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
  571. static const ppir_codegen_instr_slot_encode_func
  572. ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
  573. [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
  574. [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
  575. [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
  576. [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
  577. [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
  578. [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
  579. [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
  580. [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
  581. [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
  582. [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
  583. };
  584. static const int ppir_codegen_field_size[] = {
  585. 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
  586. };
  587. static inline int align_to_word(int size)
  588. {
  589. return ((size + 0x1f) >> 5);
  590. }
  591. static int get_instr_encode_size(ppir_instr *instr)
  592. {
  593. int size = 0;
  594. for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
  595. if (instr->slots[i])
  596. size += ppir_codegen_field_size[i];
  597. }
  598. for (int i = 0; i < 2; i++) {
  599. if (instr->constant[i].num)
  600. size += 64;
  601. }
  602. return align_to_word(size) + 1;
  603. }
  604. static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
  605. {
  606. int off1 = dst_offset & 0x1f;
  607. uint32_t *cpy_dst = dst, *cpy_src = src;
  608. cpy_dst += (dst_offset >> 5);
  609. if (off1) {
  610. int off2 = 32 - off1;
  611. int cpy_size = 0;
  612. while (1) {
  613. *cpy_dst |= *cpy_src << off1;
  614. cpy_dst++;
  615. cpy_size += off2;
  616. if (cpy_size >= src_size)
  617. break;
  618. *cpy_dst |= *cpy_src >> off2;
  619. cpy_src++;
  620. cpy_size += off1;
  621. if (cpy_size >= src_size)
  622. break;
  623. }
  624. }
  625. else
  626. memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
  627. }
  628. static int encode_instr(ppir_instr *instr, void *code, void *last_code)
  629. {
  630. int size = 0;
  631. ppir_codegen_ctrl *ctrl = code;
  632. for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
  633. if (instr->slots[i]) {
  634. /* max field size (73), align to dword */
  635. uint8_t output[12] = {0};
  636. ppir_codegen_encode_slot[i](instr->slots[i], output);
  637. bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
  638. size += ppir_codegen_field_size[i];
  639. ctrl->fields |= 1 << i;
  640. }
  641. }
  642. if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
  643. ctrl->sync = true;
  644. if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
  645. ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
  646. if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
  647. ctrl->sync = true;
  648. }
  649. if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
  650. ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
  651. if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
  652. ctrl->sync = true;
  653. }
  654. for (int i = 0; i < 2; i++) {
  655. if (instr->constant[i].num) {
  656. uint16_t output[4] = {0};
  657. ppir_codegen_encode_const(instr->constant + i, output);
  658. bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
  659. size += 64;
  660. ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
  661. }
  662. }
  663. size = align_to_word(size) + 1;
  664. ctrl->count = size;
  665. if (instr->is_end)
  666. ctrl->stop = true;
  667. if (last_code) {
  668. ppir_codegen_ctrl *last_ctrl = last_code;
  669. last_ctrl->next_count = size;
  670. last_ctrl->prefetch = true;
  671. }
  672. return size;
  673. }
  674. static void ppir_codegen_print_prog(ppir_compiler *comp)
  675. {
  676. uint32_t *prog = comp->prog->shader;
  677. unsigned offset = 0;
  678. printf("========ppir codegen========\n");
  679. list_for_each_entry(ppir_block, block, &comp->block_list, list) {
  680. list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
  681. printf("%03d (@%6d): ", instr->index, instr->offset);
  682. int n = prog[0] & 0x1f;
  683. for (int i = 0; i < n; i++) {
  684. if (i && i % 6 == 0)
  685. printf("\n ");
  686. printf("%08x ", prog[i]);
  687. }
  688. printf("\n");
  689. ppir_disassemble_instr(prog, offset);
  690. prog += n;
  691. offset += n;
  692. }
  693. }
  694. printf("-----------------------\n");
  695. }
  696. bool ppir_codegen_prog(ppir_compiler *comp)
  697. {
  698. int size = 0;
  699. list_for_each_entry(ppir_block, block, &comp->block_list, list) {
  700. list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
  701. instr->offset = size;
  702. instr->encode_size = get_instr_encode_size(instr);
  703. size += instr->encode_size;
  704. }
  705. }
  706. uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
  707. if (!prog)
  708. return false;
  709. uint32_t *code = prog, *last_code = NULL;
  710. list_for_each_entry(ppir_block, block, &comp->block_list, list) {
  711. list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
  712. int offset = encode_instr(instr, code, last_code);
  713. last_code = code;
  714. code += offset;
  715. }
  716. }
  717. comp->prog->shader = prog;
  718. comp->prog->shader_size = size * sizeof(uint32_t);
  719. if (lima_debug & LIMA_DEBUG_PP)
  720. ppir_codegen_print_prog(comp);
  721. return true;
  722. }