Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

anv_batch_chain.c 60KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669
  1. /*
  2. * Copyright © 2015 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. #include <assert.h>
  24. #include <stdbool.h>
  25. #include <string.h>
  26. #include <unistd.h>
  27. #include <fcntl.h>
  28. #include "anv_private.h"
  29. #include "genxml/gen8_pack.h"
  30. #include "util/debug.h"
  31. /** \file anv_batch_chain.c
  32. *
  33. * This file contains functions related to anv_cmd_buffer as a data
  34. * structure. This involves everything required to create and destroy
  35. * the actual batch buffers as well as link them together and handle
  36. * relocations and surface state. It specifically does *not* contain any
  37. * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
  38. */
  39. /*-----------------------------------------------------------------------*
  40. * Functions related to anv_reloc_list
  41. *-----------------------------------------------------------------------*/
  42. static VkResult
  43. anv_reloc_list_init_clone(struct anv_reloc_list *list,
  44. const VkAllocationCallbacks *alloc,
  45. const struct anv_reloc_list *other_list)
  46. {
  47. if (other_list) {
  48. list->num_relocs = other_list->num_relocs;
  49. list->array_length = other_list->array_length;
  50. } else {
  51. list->num_relocs = 0;
  52. list->array_length = 256;
  53. }
  54. list->relocs =
  55. vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
  56. VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  57. if (list->relocs == NULL)
  58. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  59. list->reloc_bos =
  60. vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
  61. VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  62. if (list->reloc_bos == NULL) {
  63. vk_free(alloc, list->relocs);
  64. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  65. }
  66. if (other_list) {
  67. memcpy(list->relocs, other_list->relocs,
  68. list->array_length * sizeof(*list->relocs));
  69. memcpy(list->reloc_bos, other_list->reloc_bos,
  70. list->array_length * sizeof(*list->reloc_bos));
  71. }
  72. return VK_SUCCESS;
  73. }
  74. VkResult
  75. anv_reloc_list_init(struct anv_reloc_list *list,
  76. const VkAllocationCallbacks *alloc)
  77. {
  78. return anv_reloc_list_init_clone(list, alloc, NULL);
  79. }
  80. void
  81. anv_reloc_list_finish(struct anv_reloc_list *list,
  82. const VkAllocationCallbacks *alloc)
  83. {
  84. vk_free(alloc, list->relocs);
  85. vk_free(alloc, list->reloc_bos);
  86. }
  87. static VkResult
  88. anv_reloc_list_grow(struct anv_reloc_list *list,
  89. const VkAllocationCallbacks *alloc,
  90. size_t num_additional_relocs)
  91. {
  92. if (list->num_relocs + num_additional_relocs <= list->array_length)
  93. return VK_SUCCESS;
  94. size_t new_length = list->array_length * 2;
  95. while (new_length < list->num_relocs + num_additional_relocs)
  96. new_length *= 2;
  97. struct drm_i915_gem_relocation_entry *new_relocs =
  98. vk_alloc(alloc, new_length * sizeof(*list->relocs), 8,
  99. VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  100. if (new_relocs == NULL)
  101. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  102. struct anv_bo **new_reloc_bos =
  103. vk_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8,
  104. VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  105. if (new_reloc_bos == NULL) {
  106. vk_free(alloc, new_relocs);
  107. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  108. }
  109. memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
  110. memcpy(new_reloc_bos, list->reloc_bos,
  111. list->num_relocs * sizeof(*list->reloc_bos));
  112. vk_free(alloc, list->relocs);
  113. vk_free(alloc, list->reloc_bos);
  114. list->array_length = new_length;
  115. list->relocs = new_relocs;
  116. list->reloc_bos = new_reloc_bos;
  117. return VK_SUCCESS;
  118. }
  119. VkResult
  120. anv_reloc_list_add(struct anv_reloc_list *list,
  121. const VkAllocationCallbacks *alloc,
  122. uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
  123. {
  124. struct drm_i915_gem_relocation_entry *entry;
  125. int index;
  126. VkResult result = anv_reloc_list_grow(list, alloc, 1);
  127. if (result != VK_SUCCESS)
  128. return result;
  129. /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
  130. index = list->num_relocs++;
  131. list->reloc_bos[index] = target_bo;
  132. entry = &list->relocs[index];
  133. entry->target_handle = target_bo->gem_handle;
  134. entry->delta = delta;
  135. entry->offset = offset;
  136. entry->presumed_offset = target_bo->offset;
  137. entry->read_domains = 0;
  138. entry->write_domain = 0;
  139. VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
  140. return VK_SUCCESS;
  141. }
  142. static VkResult
  143. anv_reloc_list_append(struct anv_reloc_list *list,
  144. const VkAllocationCallbacks *alloc,
  145. struct anv_reloc_list *other, uint32_t offset)
  146. {
  147. VkResult result = anv_reloc_list_grow(list, alloc, other->num_relocs);
  148. if (result != VK_SUCCESS)
  149. return result;
  150. memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
  151. other->num_relocs * sizeof(other->relocs[0]));
  152. memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
  153. other->num_relocs * sizeof(other->reloc_bos[0]));
  154. for (uint32_t i = 0; i < other->num_relocs; i++)
  155. list->relocs[i + list->num_relocs].offset += offset;
  156. list->num_relocs += other->num_relocs;
  157. return VK_SUCCESS;
  158. }
  159. /*-----------------------------------------------------------------------*
  160. * Functions related to anv_batch
  161. *-----------------------------------------------------------------------*/
  162. void *
  163. anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
  164. {
  165. if (batch->next + num_dwords * 4 > batch->end) {
  166. VkResult result = batch->extend_cb(batch, batch->user_data);
  167. if (result != VK_SUCCESS) {
  168. anv_batch_set_error(batch, result);
  169. return NULL;
  170. }
  171. }
  172. void *p = batch->next;
  173. batch->next += num_dwords * 4;
  174. assert(batch->next <= batch->end);
  175. return p;
  176. }
  177. uint64_t
  178. anv_batch_emit_reloc(struct anv_batch *batch,
  179. void *location, struct anv_bo *bo, uint32_t delta)
  180. {
  181. VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
  182. location - batch->start, bo, delta);
  183. if (result != VK_SUCCESS) {
  184. anv_batch_set_error(batch, result);
  185. return 0;
  186. }
  187. return bo->offset + delta;
  188. }
  189. void
  190. anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
  191. {
  192. uint32_t size, offset;
  193. size = other->next - other->start;
  194. assert(size % 4 == 0);
  195. if (batch->next + size > batch->end) {
  196. VkResult result = batch->extend_cb(batch, batch->user_data);
  197. if (result != VK_SUCCESS) {
  198. anv_batch_set_error(batch, result);
  199. return;
  200. }
  201. }
  202. assert(batch->next + size <= batch->end);
  203. VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
  204. memcpy(batch->next, other->start, size);
  205. offset = batch->next - batch->start;
  206. VkResult result = anv_reloc_list_append(batch->relocs, batch->alloc,
  207. other->relocs, offset);
  208. if (result != VK_SUCCESS) {
  209. anv_batch_set_error(batch, result);
  210. return;
  211. }
  212. batch->next += size;
  213. }
  214. /*-----------------------------------------------------------------------*
  215. * Functions related to anv_batch_bo
  216. *-----------------------------------------------------------------------*/
  217. static VkResult
  218. anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
  219. struct anv_batch_bo **bbo_out)
  220. {
  221. VkResult result;
  222. struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
  223. 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  224. if (bbo == NULL)
  225. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  226. result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
  227. ANV_CMD_BUFFER_BATCH_SIZE);
  228. if (result != VK_SUCCESS)
  229. goto fail_alloc;
  230. result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
  231. if (result != VK_SUCCESS)
  232. goto fail_bo_alloc;
  233. *bbo_out = bbo;
  234. return VK_SUCCESS;
  235. fail_bo_alloc:
  236. anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
  237. fail_alloc:
  238. vk_free(&cmd_buffer->pool->alloc, bbo);
  239. return result;
  240. }
  241. static VkResult
  242. anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
  243. const struct anv_batch_bo *other_bbo,
  244. struct anv_batch_bo **bbo_out)
  245. {
  246. VkResult result;
  247. struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
  248. 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  249. if (bbo == NULL)
  250. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  251. result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
  252. other_bbo->bo.size);
  253. if (result != VK_SUCCESS)
  254. goto fail_alloc;
  255. result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
  256. &other_bbo->relocs);
  257. if (result != VK_SUCCESS)
  258. goto fail_bo_alloc;
  259. bbo->length = other_bbo->length;
  260. memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
  261. *bbo_out = bbo;
  262. return VK_SUCCESS;
  263. fail_bo_alloc:
  264. anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
  265. fail_alloc:
  266. vk_free(&cmd_buffer->pool->alloc, bbo);
  267. return result;
  268. }
  269. static void
  270. anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
  271. size_t batch_padding)
  272. {
  273. batch->next = batch->start = bbo->bo.map;
  274. batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
  275. batch->relocs = &bbo->relocs;
  276. bbo->relocs.num_relocs = 0;
  277. }
  278. static void
  279. anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
  280. size_t batch_padding)
  281. {
  282. batch->start = bbo->bo.map;
  283. batch->next = bbo->bo.map + bbo->length;
  284. batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
  285. batch->relocs = &bbo->relocs;
  286. }
  287. static void
  288. anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
  289. {
  290. assert(batch->start == bbo->bo.map);
  291. bbo->length = batch->next - batch->start;
  292. VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
  293. }
  294. static VkResult
  295. anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
  296. struct anv_batch *batch, size_t aditional,
  297. size_t batch_padding)
  298. {
  299. assert(batch->start == bbo->bo.map);
  300. bbo->length = batch->next - batch->start;
  301. size_t new_size = bbo->bo.size;
  302. while (new_size <= bbo->length + aditional + batch_padding)
  303. new_size *= 2;
  304. if (new_size == bbo->bo.size)
  305. return VK_SUCCESS;
  306. struct anv_bo new_bo;
  307. VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
  308. &new_bo, new_size);
  309. if (result != VK_SUCCESS)
  310. return result;
  311. memcpy(new_bo.map, bbo->bo.map, bbo->length);
  312. anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
  313. bbo->bo = new_bo;
  314. anv_batch_bo_continue(bbo, batch, batch_padding);
  315. return VK_SUCCESS;
  316. }
  317. static void
  318. anv_batch_bo_destroy(struct anv_batch_bo *bbo,
  319. struct anv_cmd_buffer *cmd_buffer)
  320. {
  321. anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
  322. anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
  323. vk_free(&cmd_buffer->pool->alloc, bbo);
  324. }
  325. static VkResult
  326. anv_batch_bo_list_clone(const struct list_head *list,
  327. struct anv_cmd_buffer *cmd_buffer,
  328. struct list_head *new_list)
  329. {
  330. VkResult result = VK_SUCCESS;
  331. list_inithead(new_list);
  332. struct anv_batch_bo *prev_bbo = NULL;
  333. list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
  334. struct anv_batch_bo *new_bbo = NULL;
  335. result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
  336. if (result != VK_SUCCESS)
  337. break;
  338. list_addtail(&new_bbo->link, new_list);
  339. if (prev_bbo) {
  340. /* As we clone this list of batch_bo's, they chain one to the
  341. * other using MI_BATCH_BUFFER_START commands. We need to fix up
  342. * those relocations as we go. Fortunately, this is pretty easy
  343. * as it will always be the last relocation in the list.
  344. */
  345. uint32_t last_idx = prev_bbo->relocs.num_relocs - 1;
  346. assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
  347. prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
  348. }
  349. prev_bbo = new_bbo;
  350. }
  351. if (result != VK_SUCCESS) {
  352. list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
  353. anv_batch_bo_destroy(bbo, cmd_buffer);
  354. }
  355. return result;
  356. }
  357. /*-----------------------------------------------------------------------*
  358. * Functions related to anv_batch_bo
  359. *-----------------------------------------------------------------------*/
  360. static inline struct anv_batch_bo *
  361. anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
  362. {
  363. return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
  364. }
  365. struct anv_address
  366. anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
  367. {
  368. struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
  369. return (struct anv_address) {
  370. .bo = &cmd_buffer->device->surface_state_pool.block_pool.bo,
  371. .offset = bt_block->offset,
  372. };
  373. }
  374. static void
  375. emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
  376. struct anv_bo *bo, uint32_t offset)
  377. {
  378. /* In gen8+ the address field grew to two dwords to accomodate 48 bit
  379. * offsets. The high 16 bits are in the last dword, so we can use the gen8
  380. * version in either case, as long as we set the instruction length in the
  381. * header accordingly. This means that we always emit three dwords here
  382. * and all the padding and adjustment we do in this file works for all
  383. * gens.
  384. */
  385. #define GEN7_MI_BATCH_BUFFER_START_length 2
  386. #define GEN7_MI_BATCH_BUFFER_START_length_bias 2
  387. const uint32_t gen7_length =
  388. GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
  389. const uint32_t gen8_length =
  390. GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
  391. anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
  392. bbs.DWordLength = cmd_buffer->device->info.gen < 8 ?
  393. gen7_length : gen8_length;
  394. bbs._2ndLevelBatchBuffer = _1stlevelbatch;
  395. bbs.AddressSpaceIndicator = ASI_PPGTT;
  396. bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset };
  397. }
  398. }
  399. static void
  400. cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
  401. struct anv_batch_bo *bbo)
  402. {
  403. struct anv_batch *batch = &cmd_buffer->batch;
  404. struct anv_batch_bo *current_bbo =
  405. anv_cmd_buffer_current_batch_bo(cmd_buffer);
  406. /* We set the end of the batch a little short so we would be sure we
  407. * have room for the chaining command. Since we're about to emit the
  408. * chaining command, let's set it back where it should go.
  409. */
  410. batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
  411. assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
  412. emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
  413. anv_batch_bo_finish(current_bbo, batch);
  414. }
  415. static VkResult
  416. anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
  417. {
  418. struct anv_cmd_buffer *cmd_buffer = _data;
  419. struct anv_batch_bo *new_bbo;
  420. VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
  421. if (result != VK_SUCCESS)
  422. return result;
  423. struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
  424. if (seen_bbo == NULL) {
  425. anv_batch_bo_destroy(new_bbo, cmd_buffer);
  426. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  427. }
  428. *seen_bbo = new_bbo;
  429. cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
  430. list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
  431. anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
  432. return VK_SUCCESS;
  433. }
  434. static VkResult
  435. anv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data)
  436. {
  437. struct anv_cmd_buffer *cmd_buffer = _data;
  438. struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
  439. anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096,
  440. GEN8_MI_BATCH_BUFFER_START_length * 4);
  441. return VK_SUCCESS;
  442. }
  443. /** Allocate a binding table
  444. *
  445. * This function allocates a binding table. This is a bit more complicated
  446. * than one would think due to a combination of Vulkan driver design and some
  447. * unfortunate hardware restrictions.
  448. *
  449. * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for
  450. * the binding table pointer which means that all binding tables need to live
  451. * in the bottom 64k of surface state base address. The way the GL driver has
  452. * classically dealt with this restriction is to emit all surface states
  453. * on-the-fly into the batch and have a batch buffer smaller than 64k. This
  454. * isn't really an option in Vulkan for a couple of reasons:
  455. *
  456. * 1) In Vulkan, we have growing (or chaining) batches so surface states have
  457. * to live in their own buffer and we have to be able to re-emit
  458. * STATE_BASE_ADDRESS as needed which requires a full pipeline stall. In
  459. * order to avoid emitting STATE_BASE_ADDRESS any more often than needed
  460. * (it's not that hard to hit 64k of just binding tables), we allocate
  461. * surface state objects up-front when VkImageView is created. In order
  462. * for this to work, surface state objects need to be allocated from a
  463. * global buffer.
  464. *
  465. * 2) We tried to design the surface state system in such a way that it's
  466. * already ready for bindless texturing. The way bindless texturing works
  467. * on our hardware is that you have a big pool of surface state objects
  468. * (with its own state base address) and the bindless handles are simply
  469. * offsets into that pool. With the architecture we chose, we already
  470. * have that pool and it's exactly the same pool that we use for regular
  471. * surface states so we should already be ready for bindless.
  472. *
  473. * 3) For render targets, we need to be able to fill out the surface states
  474. * later in vkBeginRenderPass so that we can assign clear colors
  475. * correctly. One way to do this would be to just create the surface
  476. * state data and then repeatedly copy it into the surface state BO every
  477. * time we have to re-emit STATE_BASE_ADDRESS. While this works, it's
  478. * rather annoying and just being able to allocate them up-front and
  479. * re-use them for the entire render pass.
  480. *
  481. * While none of these are technically blockers for emitting state on the fly
  482. * like we do in GL, the ability to have a single surface state pool is
  483. * simplifies things greatly. Unfortunately, it comes at a cost...
  484. *
  485. * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't
  486. * place the binding tables just anywhere in surface state base address.
  487. * Because 64k isn't a whole lot of space, we can't simply restrict the
  488. * surface state buffer to 64k, we have to be more clever. The solution we've
  489. * chosen is to have a block pool with a maximum size of 2G that starts at
  490. * zero and grows in both directions. All surface states are allocated from
  491. * the top of the pool (positive offsets) and we allocate blocks (< 64k) of
  492. * binding tables from the bottom of the pool (negative offsets). Every time
  493. * we allocate a new binding table block, we set surface state base address to
  494. * point to the bottom of the binding table block. This way all of the
  495. * binding tables in the block are in the bottom 64k of surface state base
  496. * address. When we fill out the binding table, we add the distance between
  497. * the bottom of our binding table block and zero of the block pool to the
  498. * surface state offsets so that they are correct relative to out new surface
  499. * state base address at the bottom of the binding table block.
  500. *
  501. * \see adjust_relocations_from_block_pool()
  502. * \see adjust_relocations_too_block_pool()
  503. *
  504. * \param[in] entries The number of surface state entries the binding
  505. * table should be able to hold.
  506. *
  507. * \param[out] state_offset The offset surface surface state base address
  508. * where the surface states live. This must be
  509. * added to the surface state offset when it is
  510. * written into the binding table entry.
  511. *
  512. * \return An anv_state representing the binding table
  513. */
  514. struct anv_state
  515. anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
  516. uint32_t entries, uint32_t *state_offset)
  517. {
  518. struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
  519. struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
  520. struct anv_state state;
  521. state.alloc_size = align_u32(entries * 4, 32);
  522. if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
  523. return (struct anv_state) { 0 };
  524. state.offset = cmd_buffer->bt_next;
  525. state.map = state_pool->block_pool.map + bt_block->offset + state.offset;
  526. cmd_buffer->bt_next += state.alloc_size;
  527. assert(bt_block->offset < 0);
  528. *state_offset = -bt_block->offset;
  529. return state;
  530. }
  531. struct anv_state
  532. anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
  533. {
  534. struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
  535. return anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
  536. isl_dev->ss.size, isl_dev->ss.align);
  537. }
  538. struct anv_state
  539. anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
  540. uint32_t size, uint32_t alignment)
  541. {
  542. return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
  543. size, alignment);
  544. }
  545. VkResult
  546. anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
  547. {
  548. struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
  549. struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states);
  550. if (bt_block == NULL) {
  551. anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
  552. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  553. }
  554. *bt_block = anv_state_pool_alloc_back(state_pool);
  555. cmd_buffer->bt_next = 0;
  556. return VK_SUCCESS;
  557. }
  558. VkResult
  559. anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
  560. {
  561. struct anv_batch_bo *batch_bo;
  562. VkResult result;
  563. list_inithead(&cmd_buffer->batch_bos);
  564. result = anv_batch_bo_create(cmd_buffer, &batch_bo);
  565. if (result != VK_SUCCESS)
  566. return result;
  567. list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
  568. cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
  569. cmd_buffer->batch.user_data = cmd_buffer;
  570. if (cmd_buffer->device->can_chain_batches) {
  571. cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
  572. } else {
  573. cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch;
  574. }
  575. anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
  576. GEN8_MI_BATCH_BUFFER_START_length * 4);
  577. int success = u_vector_init(&cmd_buffer->seen_bbos,
  578. sizeof(struct anv_bo *),
  579. 8 * sizeof(struct anv_bo *));
  580. if (!success)
  581. goto fail_batch_bo;
  582. *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
  583. /* u_vector requires power-of-two size elements */
  584. unsigned pow2_state_size = util_next_power_of_two(sizeof(struct anv_state));
  585. success = u_vector_init(&cmd_buffer->bt_block_states,
  586. pow2_state_size, 8 * pow2_state_size);
  587. if (!success)
  588. goto fail_seen_bbos;
  589. result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
  590. &cmd_buffer->pool->alloc);
  591. if (result != VK_SUCCESS)
  592. goto fail_bt_blocks;
  593. cmd_buffer->last_ss_pool_center = 0;
  594. result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
  595. if (result != VK_SUCCESS)
  596. goto fail_bt_blocks;
  597. return VK_SUCCESS;
  598. fail_bt_blocks:
  599. u_vector_finish(&cmd_buffer->bt_block_states);
  600. fail_seen_bbos:
  601. u_vector_finish(&cmd_buffer->seen_bbos);
  602. fail_batch_bo:
  603. anv_batch_bo_destroy(batch_bo, cmd_buffer);
  604. return result;
  605. }
  606. void
  607. anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
  608. {
  609. struct anv_state *bt_block;
  610. u_vector_foreach(bt_block, &cmd_buffer->bt_block_states)
  611. anv_state_pool_free(&cmd_buffer->device->surface_state_pool, *bt_block);
  612. u_vector_finish(&cmd_buffer->bt_block_states);
  613. anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
  614. u_vector_finish(&cmd_buffer->seen_bbos);
  615. /* Destroy all of the batch buffers */
  616. list_for_each_entry_safe(struct anv_batch_bo, bbo,
  617. &cmd_buffer->batch_bos, link) {
  618. anv_batch_bo_destroy(bbo, cmd_buffer);
  619. }
  620. }
  621. void
  622. anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
  623. {
  624. /* Delete all but the first batch bo */
  625. assert(!list_empty(&cmd_buffer->batch_bos));
  626. while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
  627. struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
  628. list_del(&bbo->link);
  629. anv_batch_bo_destroy(bbo, cmd_buffer);
  630. }
  631. assert(!list_empty(&cmd_buffer->batch_bos));
  632. anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
  633. &cmd_buffer->batch,
  634. GEN8_MI_BATCH_BUFFER_START_length * 4);
  635. while (u_vector_length(&cmd_buffer->bt_block_states) > 1) {
  636. struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states);
  637. anv_state_pool_free(&cmd_buffer->device->surface_state_pool, *bt_block);
  638. }
  639. assert(u_vector_length(&cmd_buffer->bt_block_states) == 1);
  640. cmd_buffer->bt_next = 0;
  641. cmd_buffer->surface_relocs.num_relocs = 0;
  642. cmd_buffer->last_ss_pool_center = 0;
  643. /* Reset the list of seen buffers */
  644. cmd_buffer->seen_bbos.head = 0;
  645. cmd_buffer->seen_bbos.tail = 0;
  646. *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) =
  647. anv_cmd_buffer_current_batch_bo(cmd_buffer);
  648. }
  649. void
  650. anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
  651. {
  652. struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
  653. if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
  654. /* When we start a batch buffer, we subtract a certain amount of
  655. * padding from the end to ensure that we always have room to emit a
  656. * BATCH_BUFFER_START to chain to the next BO. We need to remove
  657. * that padding before we end the batch; otherwise, we may end up
  658. * with our BATCH_BUFFER_END in another BO.
  659. */
  660. cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
  661. assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
  662. anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe);
  663. /* Round batch up to an even number of dwords. */
  664. if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
  665. anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP, noop);
  666. cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
  667. }
  668. anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
  669. if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
  670. /* If this is a secondary command buffer, we need to determine the
  671. * mode in which it will be executed with vkExecuteCommands. We
  672. * determine this statically here so that this stays in sync with the
  673. * actual ExecuteCommands implementation.
  674. */
  675. if (!cmd_buffer->device->can_chain_batches) {
  676. cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
  677. } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
  678. (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
  679. /* If the secondary has exactly one batch buffer in its list *and*
  680. * that batch buffer is less than half of the maximum size, we're
  681. * probably better of simply copying it into our batch.
  682. */
  683. cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
  684. } else if (!(cmd_buffer->usage_flags &
  685. VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
  686. cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
  687. /* When we chain, we need to add an MI_BATCH_BUFFER_START command
  688. * with its relocation. In order to handle this we'll increment here
  689. * so we can unconditionally decrement right before adding the
  690. * MI_BATCH_BUFFER_START command.
  691. */
  692. batch_bo->relocs.num_relocs++;
  693. cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4;
  694. } else {
  695. cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
  696. }
  697. }
  698. }
  699. static inline VkResult
  700. anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer,
  701. struct list_head *list)
  702. {
  703. list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
  704. struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos);
  705. if (bbo_ptr == NULL)
  706. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  707. *bbo_ptr = bbo;
  708. }
  709. return VK_SUCCESS;
  710. }
  711. void
  712. anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
  713. struct anv_cmd_buffer *secondary)
  714. {
  715. switch (secondary->exec_mode) {
  716. case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
  717. anv_batch_emit_batch(&primary->batch, &secondary->batch);
  718. break;
  719. case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: {
  720. struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary);
  721. unsigned length = secondary->batch.end - secondary->batch.start;
  722. anv_batch_bo_grow(primary, bbo, &primary->batch, length,
  723. GEN8_MI_BATCH_BUFFER_START_length * 4);
  724. anv_batch_emit_batch(&primary->batch, &secondary->batch);
  725. break;
  726. }
  727. case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: {
  728. struct anv_batch_bo *first_bbo =
  729. list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
  730. struct anv_batch_bo *last_bbo =
  731. list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
  732. emit_batch_buffer_start(primary, &first_bbo->bo, 0);
  733. struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
  734. assert(primary->batch.start == this_bbo->bo.map);
  735. uint32_t offset = primary->batch.next - primary->batch.start;
  736. const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4;
  737. /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
  738. * can emit a new command and relocation for the current splice. In
  739. * order to handle the initial-use case, we incremented next and
  740. * num_relocs in end_batch_buffer() so we can alyways just subtract
  741. * here.
  742. */
  743. last_bbo->relocs.num_relocs--;
  744. secondary->batch.next -= inst_size;
  745. emit_batch_buffer_start(secondary, &this_bbo->bo, offset);
  746. anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
  747. /* After patching up the secondary buffer, we need to clflush the
  748. * modified instruction in case we're on a !llc platform. We use a
  749. * little loop to handle the case where the instruction crosses a cache
  750. * line boundary.
  751. */
  752. if (!primary->device->info.has_llc) {
  753. void *inst = secondary->batch.next - inst_size;
  754. void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
  755. __builtin_ia32_mfence();
  756. while (p < secondary->batch.next) {
  757. __builtin_ia32_clflush(p);
  758. p += CACHELINE_SIZE;
  759. }
  760. }
  761. break;
  762. }
  763. case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
  764. struct list_head copy_list;
  765. VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
  766. secondary,
  767. &copy_list);
  768. if (result != VK_SUCCESS)
  769. return; /* FIXME */
  770. anv_cmd_buffer_add_seen_bbos(primary, &copy_list);
  771. struct anv_batch_bo *first_bbo =
  772. list_first_entry(&copy_list, struct anv_batch_bo, link);
  773. struct anv_batch_bo *last_bbo =
  774. list_last_entry(&copy_list, struct anv_batch_bo, link);
  775. cmd_buffer_chain_to_batch_bo(primary, first_bbo);
  776. list_splicetail(&copy_list, &primary->batch_bos);
  777. anv_batch_bo_continue(last_bbo, &primary->batch,
  778. GEN8_MI_BATCH_BUFFER_START_length * 4);
  779. break;
  780. }
  781. default:
  782. assert(!"Invalid execution mode");
  783. }
  784. anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
  785. &secondary->surface_relocs, 0);
  786. }
  787. struct anv_execbuf {
  788. struct drm_i915_gem_execbuffer2 execbuf;
  789. struct drm_i915_gem_exec_object2 * objects;
  790. uint32_t bo_count;
  791. struct anv_bo ** bos;
  792. /* Allocated length of the 'objects' and 'bos' arrays */
  793. uint32_t array_length;
  794. uint32_t fence_count;
  795. uint32_t fence_array_length;
  796. struct drm_i915_gem_exec_fence * fences;
  797. struct anv_syncobj ** syncobjs;
  798. };
  799. static void
  800. anv_execbuf_init(struct anv_execbuf *exec)
  801. {
  802. memset(exec, 0, sizeof(*exec));
  803. }
  804. static void
  805. anv_execbuf_finish(struct anv_execbuf *exec,
  806. const VkAllocationCallbacks *alloc)
  807. {
  808. vk_free(alloc, exec->objects);
  809. vk_free(alloc, exec->bos);
  810. vk_free(alloc, exec->fences);
  811. vk_free(alloc, exec->syncobjs);
  812. }
  813. static VkResult
  814. anv_execbuf_add_bo(struct anv_execbuf *exec,
  815. struct anv_bo *bo,
  816. struct anv_reloc_list *relocs,
  817. uint32_t extra_flags,
  818. const VkAllocationCallbacks *alloc)
  819. {
  820. struct drm_i915_gem_exec_object2 *obj = NULL;
  821. if (bo->index < exec->bo_count && exec->bos[bo->index] == bo)
  822. obj = &exec->objects[bo->index];
  823. if (obj == NULL) {
  824. /* We've never seen this one before. Add it to the list and assign
  825. * an id that we can use later.
  826. */
  827. if (exec->bo_count >= exec->array_length) {
  828. uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
  829. struct drm_i915_gem_exec_object2 *new_objects =
  830. vk_alloc(alloc, new_len * sizeof(*new_objects),
  831. 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
  832. if (new_objects == NULL)
  833. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  834. struct anv_bo **new_bos =
  835. vk_alloc(alloc, new_len * sizeof(*new_bos),
  836. 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
  837. if (new_bos == NULL) {
  838. vk_free(alloc, new_objects);
  839. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  840. }
  841. if (exec->objects) {
  842. memcpy(new_objects, exec->objects,
  843. exec->bo_count * sizeof(*new_objects));
  844. memcpy(new_bos, exec->bos,
  845. exec->bo_count * sizeof(*new_bos));
  846. }
  847. vk_free(alloc, exec->objects);
  848. vk_free(alloc, exec->bos);
  849. exec->objects = new_objects;
  850. exec->bos = new_bos;
  851. exec->array_length = new_len;
  852. }
  853. assert(exec->bo_count < exec->array_length);
  854. bo->index = exec->bo_count++;
  855. obj = &exec->objects[bo->index];
  856. exec->bos[bo->index] = bo;
  857. obj->handle = bo->gem_handle;
  858. obj->relocation_count = 0;
  859. obj->relocs_ptr = 0;
  860. obj->alignment = 0;
  861. obj->offset = bo->offset;
  862. obj->flags = bo->flags | extra_flags;
  863. obj->rsvd1 = 0;
  864. obj->rsvd2 = 0;
  865. }
  866. if (relocs != NULL && obj->relocation_count == 0) {
  867. /* This is the first time we've ever seen a list of relocations for
  868. * this BO. Go ahead and set the relocations and then walk the list
  869. * of relocations and add them all.
  870. */
  871. obj->relocation_count = relocs->num_relocs;
  872. obj->relocs_ptr = (uintptr_t) relocs->relocs;
  873. for (size_t i = 0; i < relocs->num_relocs; i++) {
  874. VkResult result;
  875. /* A quick sanity check on relocations */
  876. assert(relocs->relocs[i].offset < bo->size);
  877. result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL,
  878. extra_flags, alloc);
  879. if (result != VK_SUCCESS)
  880. return result;
  881. }
  882. }
  883. return VK_SUCCESS;
  884. }
  885. static VkResult
  886. anv_execbuf_add_syncobj(struct anv_execbuf *exec,
  887. uint32_t handle, uint32_t flags,
  888. const VkAllocationCallbacks *alloc)
  889. {
  890. assert(flags != 0);
  891. if (exec->fence_count >= exec->fence_array_length) {
  892. uint32_t new_len = MAX2(exec->fence_array_length * 2, 64);
  893. exec->fences = vk_realloc(alloc, exec->fences,
  894. new_len * sizeof(*exec->fences),
  895. 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
  896. if (exec->fences == NULL)
  897. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  898. exec->fence_array_length = new_len;
  899. }
  900. exec->fences[exec->fence_count] = (struct drm_i915_gem_exec_fence) {
  901. .handle = handle,
  902. .flags = flags,
  903. };
  904. exec->fence_count++;
  905. return VK_SUCCESS;
  906. }
  907. static void
  908. anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
  909. struct anv_reloc_list *list)
  910. {
  911. for (size_t i = 0; i < list->num_relocs; i++)
  912. list->relocs[i].target_handle = list->reloc_bos[i]->index;
  913. }
  914. static void
  915. write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
  916. {
  917. unsigned reloc_size = 0;
  918. if (device->info.gen >= 8) {
  919. /* From the Broadwell PRM Vol. 2a, MI_LOAD_REGISTER_MEM::MemoryAddress:
  920. *
  921. * "This field specifies the address of the memory location where the
  922. * register value specified in the DWord above will read from. The
  923. * address specifies the DWord location of the data. Range =
  924. * GraphicsVirtualAddress[63:2] for a DWord register GraphicsAddress
  925. * [63:48] are ignored by the HW and assumed to be in correct
  926. * canonical form [63:48] == [47]."
  927. */
  928. const int shift = 63 - 47;
  929. reloc_size = sizeof(uint64_t);
  930. *(uint64_t *)p = (((int64_t)v) << shift) >> shift;
  931. } else {
  932. reloc_size = sizeof(uint32_t);
  933. *(uint32_t *)p = v;
  934. }
  935. if (flush && !device->info.has_llc)
  936. gen_flush_range(p, reloc_size);
  937. }
  938. static void
  939. adjust_relocations_from_state_pool(struct anv_state_pool *pool,
  940. struct anv_reloc_list *relocs,
  941. uint32_t last_pool_center_bo_offset)
  942. {
  943. assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
  944. uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
  945. for (size_t i = 0; i < relocs->num_relocs; i++) {
  946. /* All of the relocations from this block pool to other BO's should
  947. * have been emitted relative to the surface block pool center. We
  948. * need to add the center offset to make them relative to the
  949. * beginning of the actual GEM bo.
  950. */
  951. relocs->relocs[i].offset += delta;
  952. }
  953. }
  954. static void
  955. adjust_relocations_to_state_pool(struct anv_state_pool *pool,
  956. struct anv_bo *from_bo,
  957. struct anv_reloc_list *relocs,
  958. uint32_t last_pool_center_bo_offset)
  959. {
  960. assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
  961. uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
  962. /* When we initially emit relocations into a block pool, we don't
  963. * actually know what the final center_bo_offset will be so we just emit
  964. * it as if center_bo_offset == 0. Now that we know what the center
  965. * offset is, we need to walk the list of relocations and adjust any
  966. * relocations that point to the pool bo with the correct offset.
  967. */
  968. for (size_t i = 0; i < relocs->num_relocs; i++) {
  969. if (relocs->reloc_bos[i] == &pool->block_pool.bo) {
  970. /* Adjust the delta value in the relocation to correctly
  971. * correspond to the new delta. Initially, this value may have
  972. * been negative (if treated as unsigned), but we trust in
  973. * uint32_t roll-over to fix that for us at this point.
  974. */
  975. relocs->relocs[i].delta += delta;
  976. /* Since the delta has changed, we need to update the actual
  977. * relocated value with the new presumed value. This function
  978. * should only be called on batch buffers, so we know it isn't in
  979. * use by the GPU at the moment.
  980. */
  981. assert(relocs->relocs[i].offset < from_bo->size);
  982. write_reloc(pool->block_pool.device,
  983. from_bo->map + relocs->relocs[i].offset,
  984. relocs->relocs[i].presumed_offset +
  985. relocs->relocs[i].delta, false);
  986. }
  987. }
  988. }
  989. static void
  990. anv_reloc_list_apply(struct anv_device *device,
  991. struct anv_reloc_list *list,
  992. struct anv_bo *bo,
  993. bool always_relocate)
  994. {
  995. for (size_t i = 0; i < list->num_relocs; i++) {
  996. struct anv_bo *target_bo = list->reloc_bos[i];
  997. if (list->relocs[i].presumed_offset == target_bo->offset &&
  998. !always_relocate)
  999. continue;
  1000. void *p = bo->map + list->relocs[i].offset;
  1001. write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true);
  1002. list->relocs[i].presumed_offset = target_bo->offset;
  1003. }
  1004. }
  1005. /**
  1006. * This function applies the relocation for a command buffer and writes the
  1007. * actual addresses into the buffers as per what we were told by the kernel on
  1008. * the previous execbuf2 call. This should be safe to do because, for each
  1009. * relocated address, we have two cases:
  1010. *
  1011. * 1) The target BO is inactive (as seen by the kernel). In this case, it is
  1012. * not in use by the GPU so updating the address is 100% ok. It won't be
  1013. * in-use by the GPU (from our context) again until the next execbuf2
  1014. * happens. If the kernel decides to move it in the next execbuf2, it
  1015. * will have to do the relocations itself, but that's ok because it should
  1016. * have all of the information needed to do so.
  1017. *
  1018. * 2) The target BO is active (as seen by the kernel). In this case, it
  1019. * hasn't moved since the last execbuffer2 call because GTT shuffling
  1020. * *only* happens when the BO is idle. (From our perspective, it only
  1021. * happens inside the execbuffer2 ioctl, but the shuffling may be
  1022. * triggered by another ioctl, with full-ppgtt this is limited to only
  1023. * execbuffer2 ioctls on the same context, or memory pressure.) Since the
  1024. * target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT
  1025. * address and the relocated value we are writing into the BO will be the
  1026. * same as the value that is already there.
  1027. *
  1028. * There is also a possibility that the target BO is active but the exact
  1029. * RENDER_SURFACE_STATE object we are writing the relocation into isn't in
  1030. * use. In this case, the address currently in the RENDER_SURFACE_STATE
  1031. * may be stale but it's still safe to write the relocation because that
  1032. * particular RENDER_SURFACE_STATE object isn't in-use by the GPU and
  1033. * won't be until the next execbuf2 call.
  1034. *
  1035. * By doing relocations on the CPU, we can tell the kernel that it doesn't
  1036. * need to bother. We want to do this because the surface state buffer is
  1037. * used by every command buffer so, if the kernel does the relocations, it
  1038. * will always be busy and the kernel will always stall. This is also
  1039. * probably the fastest mechanism for doing relocations since the kernel would
  1040. * have to make a full copy of all the relocations lists.
  1041. */
  1042. static bool
  1043. relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
  1044. struct anv_execbuf *exec)
  1045. {
  1046. static int userspace_relocs = -1;
  1047. if (userspace_relocs < 0)
  1048. userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true);
  1049. if (!userspace_relocs)
  1050. return false;
  1051. /* First, we have to check to see whether or not we can even do the
  1052. * relocation. New buffers which have never been submitted to the kernel
  1053. * don't have a valid offset so we need to let the kernel do relocations so
  1054. * that we can get offsets for them. On future execbuf2 calls, those
  1055. * buffers will have offsets and we will be able to skip relocating.
  1056. * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1.
  1057. */
  1058. for (uint32_t i = 0; i < exec->bo_count; i++) {
  1059. if (exec->bos[i]->offset == (uint64_t)-1)
  1060. return false;
  1061. }
  1062. /* Since surface states are shared between command buffers and we don't
  1063. * know what order they will be submitted to the kernel, we don't know
  1064. * what address is actually written in the surface state object at any
  1065. * given time. The only option is to always relocate them.
  1066. */
  1067. anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
  1068. &cmd_buffer->device->surface_state_pool.block_pool.bo,
  1069. true /* always relocate surface states */);
  1070. /* Since we own all of the batch buffers, we know what values are stored
  1071. * in the relocated addresses and only have to update them if the offsets
  1072. * have changed.
  1073. */
  1074. struct anv_batch_bo **bbo;
  1075. u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
  1076. anv_reloc_list_apply(cmd_buffer->device,
  1077. &(*bbo)->relocs, &(*bbo)->bo, false);
  1078. }
  1079. for (uint32_t i = 0; i < exec->bo_count; i++)
  1080. exec->objects[i].offset = exec->bos[i]->offset;
  1081. return true;
  1082. }
  1083. static VkResult
  1084. setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
  1085. struct anv_cmd_buffer *cmd_buffer)
  1086. {
  1087. struct anv_batch *batch = &cmd_buffer->batch;
  1088. struct anv_state_pool *ss_pool =
  1089. &cmd_buffer->device->surface_state_pool;
  1090. adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
  1091. cmd_buffer->last_ss_pool_center);
  1092. VkResult result = anv_execbuf_add_bo(execbuf, &ss_pool->block_pool.bo,
  1093. &cmd_buffer->surface_relocs, 0,
  1094. &cmd_buffer->device->alloc);
  1095. if (result != VK_SUCCESS)
  1096. return result;
  1097. /* First, we walk over all of the bos we've seen and add them and their
  1098. * relocations to the validate list.
  1099. */
  1100. struct anv_batch_bo **bbo;
  1101. u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
  1102. adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
  1103. cmd_buffer->last_ss_pool_center);
  1104. result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0,
  1105. &cmd_buffer->device->alloc);
  1106. if (result != VK_SUCCESS)
  1107. return result;
  1108. }
  1109. /* Now that we've adjusted all of the surface state relocations, we need to
  1110. * record the surface state pool center so future executions of the command
  1111. * buffer can adjust correctly.
  1112. */
  1113. cmd_buffer->last_ss_pool_center = ss_pool->block_pool.center_bo_offset;
  1114. struct anv_batch_bo *first_batch_bo =
  1115. list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
  1116. /* The kernel requires that the last entry in the validation list be the
  1117. * batch buffer to execute. We can simply swap the element
  1118. * corresponding to the first batch_bo in the chain with the last
  1119. * element in the list.
  1120. */
  1121. if (first_batch_bo->bo.index != execbuf->bo_count - 1) {
  1122. uint32_t idx = first_batch_bo->bo.index;
  1123. uint32_t last_idx = execbuf->bo_count - 1;
  1124. struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
  1125. assert(execbuf->bos[idx] == &first_batch_bo->bo);
  1126. execbuf->objects[idx] = execbuf->objects[last_idx];
  1127. execbuf->bos[idx] = execbuf->bos[last_idx];
  1128. execbuf->bos[idx]->index = idx;
  1129. execbuf->objects[last_idx] = tmp_obj;
  1130. execbuf->bos[last_idx] = &first_batch_bo->bo;
  1131. first_batch_bo->bo.index = last_idx;
  1132. }
  1133. /* Now we go through and fixup all of the relocation lists to point to
  1134. * the correct indices in the object array. We have to do this after we
  1135. * reorder the list above as some of the indices may have changed.
  1136. */
  1137. u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
  1138. anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
  1139. anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
  1140. if (!cmd_buffer->device->info.has_llc) {
  1141. __builtin_ia32_mfence();
  1142. u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
  1143. for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
  1144. __builtin_ia32_clflush((*bbo)->bo.map + i);
  1145. }
  1146. }
  1147. execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
  1148. .buffers_ptr = (uintptr_t) execbuf->objects,
  1149. .buffer_count = execbuf->bo_count,
  1150. .batch_start_offset = 0,
  1151. .batch_len = batch->next - batch->start,
  1152. .cliprects_ptr = 0,
  1153. .num_cliprects = 0,
  1154. .DR1 = 0,
  1155. .DR4 = 0,
  1156. .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER |
  1157. I915_EXEC_CONSTANTS_REL_GENERAL,
  1158. .rsvd1 = cmd_buffer->device->context_id,
  1159. .rsvd2 = 0,
  1160. };
  1161. if (relocate_cmd_buffer(cmd_buffer, execbuf)) {
  1162. /* If we were able to successfully relocate everything, tell the kernel
  1163. * that it can skip doing relocations. The requirement for using
  1164. * NO_RELOC is:
  1165. *
  1166. * 1) The addresses written in the objects must match the corresponding
  1167. * reloc.presumed_offset which in turn must match the corresponding
  1168. * execobject.offset.
  1169. *
  1170. * 2) To avoid stalling, execobject.offset should match the current
  1171. * address of that object within the active context.
  1172. *
  1173. * In order to satisfy all of the invariants that make userspace
  1174. * relocations to be safe (see relocate_cmd_buffer()), we need to
  1175. * further ensure that the addresses we use match those used by the
  1176. * kernel for the most recent execbuf2.
  1177. *
  1178. * The kernel may still choose to do relocations anyway if something has
  1179. * moved in the GTT. In this case, the relocation list still needs to be
  1180. * valid. All relocations on the batch buffers are already valid and
  1181. * kept up-to-date. For surface state relocations, by applying the
  1182. * relocations in relocate_cmd_buffer, we ensured that the address in
  1183. * the RENDER_SURFACE_STATE matches presumed_offset, so it should be
  1184. * safe for the kernel to relocate them as needed.
  1185. */
  1186. execbuf->execbuf.flags |= I915_EXEC_NO_RELOC;
  1187. } else {
  1188. /* In the case where we fall back to doing kernel relocations, we need
  1189. * to ensure that the relocation list is valid. All relocations on the
  1190. * batch buffers are already valid and kept up-to-date. Since surface
  1191. * states are shared between command buffers and we don't know what
  1192. * order they will be submitted to the kernel, we don't know what
  1193. * address is actually written in the surface state object at any given
  1194. * time. The only option is to set a bogus presumed offset and let the
  1195. * kernel relocate them.
  1196. */
  1197. for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
  1198. cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
  1199. }
  1200. return VK_SUCCESS;
  1201. }
  1202. static VkResult
  1203. setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
  1204. {
  1205. VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo,
  1206. NULL, 0, &device->alloc);
  1207. if (result != VK_SUCCESS)
  1208. return result;
  1209. execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
  1210. .buffers_ptr = (uintptr_t) execbuf->objects,
  1211. .buffer_count = execbuf->bo_count,
  1212. .batch_start_offset = 0,
  1213. .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */
  1214. .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
  1215. .rsvd1 = device->context_id,
  1216. .rsvd2 = 0,
  1217. };
  1218. return VK_SUCCESS;
  1219. }
  1220. VkResult
  1221. anv_cmd_buffer_execbuf(struct anv_device *device,
  1222. struct anv_cmd_buffer *cmd_buffer,
  1223. const VkSemaphore *in_semaphores,
  1224. uint32_t num_in_semaphores,
  1225. const VkSemaphore *out_semaphores,
  1226. uint32_t num_out_semaphores,
  1227. VkFence _fence)
  1228. {
  1229. ANV_FROM_HANDLE(anv_fence, fence, _fence);
  1230. struct anv_execbuf execbuf;
  1231. anv_execbuf_init(&execbuf);
  1232. int in_fence = -1;
  1233. VkResult result = VK_SUCCESS;
  1234. for (uint32_t i = 0; i < num_in_semaphores; i++) {
  1235. ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
  1236. struct anv_semaphore_impl *impl =
  1237. semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
  1238. &semaphore->temporary : &semaphore->permanent;
  1239. switch (impl->type) {
  1240. case ANV_SEMAPHORE_TYPE_BO:
  1241. result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
  1242. 0, &device->alloc);
  1243. if (result != VK_SUCCESS)
  1244. return result;
  1245. break;
  1246. case ANV_SEMAPHORE_TYPE_SYNC_FILE:
  1247. if (in_fence == -1) {
  1248. in_fence = impl->fd;
  1249. } else {
  1250. int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd);
  1251. if (merge == -1)
  1252. return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
  1253. close(impl->fd);
  1254. close(in_fence);
  1255. in_fence = merge;
  1256. }
  1257. impl->fd = -1;
  1258. break;
  1259. case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
  1260. result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
  1261. I915_EXEC_FENCE_WAIT,
  1262. &device->alloc);
  1263. if (result != VK_SUCCESS)
  1264. return result;
  1265. break;
  1266. default:
  1267. break;
  1268. }
  1269. }
  1270. bool need_out_fence = false;
  1271. for (uint32_t i = 0; i < num_out_semaphores; i++) {
  1272. ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
  1273. /* Under most circumstances, out fences won't be temporary. However,
  1274. * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
  1275. *
  1276. * "If the import is temporary, the implementation must restore the
  1277. * semaphore to its prior permanent state after submitting the next
  1278. * semaphore wait operation."
  1279. *
  1280. * The spec says nothing whatsoever about signal operations on
  1281. * temporarily imported semaphores so it appears they are allowed.
  1282. * There are also CTS tests that require this to work.
  1283. */
  1284. struct anv_semaphore_impl *impl =
  1285. semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
  1286. &semaphore->temporary : &semaphore->permanent;
  1287. switch (impl->type) {
  1288. case ANV_SEMAPHORE_TYPE_BO:
  1289. result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
  1290. EXEC_OBJECT_WRITE, &device->alloc);
  1291. if (result != VK_SUCCESS)
  1292. return result;
  1293. break;
  1294. case ANV_SEMAPHORE_TYPE_SYNC_FILE:
  1295. need_out_fence = true;
  1296. break;
  1297. case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
  1298. result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
  1299. I915_EXEC_FENCE_SIGNAL,
  1300. &device->alloc);
  1301. if (result != VK_SUCCESS)
  1302. return result;
  1303. break;
  1304. default:
  1305. break;
  1306. }
  1307. }
  1308. if (fence) {
  1309. /* Under most circumstances, out fences won't be temporary. However,
  1310. * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
  1311. *
  1312. * "If the import is temporary, the implementation must restore the
  1313. * semaphore to its prior permanent state after submitting the next
  1314. * semaphore wait operation."
  1315. *
  1316. * The spec says nothing whatsoever about signal operations on
  1317. * temporarily imported semaphores so it appears they are allowed.
  1318. * There are also CTS tests that require this to work.
  1319. */
  1320. struct anv_fence_impl *impl =
  1321. fence->temporary.type != ANV_FENCE_TYPE_NONE ?
  1322. &fence->temporary : &fence->permanent;
  1323. switch (impl->type) {
  1324. case ANV_FENCE_TYPE_BO:
  1325. result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL,
  1326. EXEC_OBJECT_WRITE, &device->alloc);
  1327. if (result != VK_SUCCESS)
  1328. return result;
  1329. break;
  1330. case ANV_FENCE_TYPE_SYNCOBJ:
  1331. result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
  1332. I915_EXEC_FENCE_SIGNAL,
  1333. &device->alloc);
  1334. if (result != VK_SUCCESS)
  1335. return result;
  1336. break;
  1337. default:
  1338. unreachable("Invalid fence type");
  1339. }
  1340. }
  1341. if (cmd_buffer)
  1342. result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
  1343. else
  1344. result = setup_empty_execbuf(&execbuf, device);
  1345. if (result != VK_SUCCESS)
  1346. return result;
  1347. if (execbuf.fence_count > 0) {
  1348. assert(device->instance->physicalDevice.has_syncobj);
  1349. execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
  1350. execbuf.execbuf.num_cliprects = execbuf.fence_count;
  1351. execbuf.execbuf.cliprects_ptr = (uintptr_t) execbuf.fences;
  1352. }
  1353. if (in_fence != -1) {
  1354. execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
  1355. execbuf.execbuf.rsvd2 |= (uint32_t)in_fence;
  1356. }
  1357. if (need_out_fence)
  1358. execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
  1359. result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
  1360. /* Execbuf does not consume the in_fence. It's our job to close it. */
  1361. if (in_fence != -1)
  1362. close(in_fence);
  1363. for (uint32_t i = 0; i < num_in_semaphores; i++) {
  1364. ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
  1365. /* From the Vulkan 1.0.53 spec:
  1366. *
  1367. * "If the import is temporary, the implementation must restore the
  1368. * semaphore to its prior permanent state after submitting the next
  1369. * semaphore wait operation."
  1370. *
  1371. * This has to happen after the execbuf in case we close any syncobjs in
  1372. * the process.
  1373. */
  1374. anv_semaphore_reset_temporary(device, semaphore);
  1375. }
  1376. if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
  1377. /* BO fences can't be shared, so they can't be temporary. */
  1378. assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
  1379. /* Once the execbuf has returned, we need to set the fence state to
  1380. * SUBMITTED. We can't do this before calling execbuf because
  1381. * anv_GetFenceStatus does take the global device lock before checking
  1382. * fence->state.
  1383. *
  1384. * We set the fence state to SUBMITTED regardless of whether or not the
  1385. * execbuf succeeds because we need to ensure that vkWaitForFences() and
  1386. * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
  1387. * VK_SUCCESS) in a finite amount of time even if execbuf fails.
  1388. */
  1389. fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
  1390. }
  1391. if (result == VK_SUCCESS && need_out_fence) {
  1392. int out_fence = execbuf.execbuf.rsvd2 >> 32;
  1393. for (uint32_t i = 0; i < num_out_semaphores; i++) {
  1394. ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
  1395. /* Out fences can't have temporary state because that would imply
  1396. * that we imported a sync file and are trying to signal it.
  1397. */
  1398. assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
  1399. struct anv_semaphore_impl *impl = &semaphore->permanent;
  1400. if (impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE) {
  1401. assert(impl->fd == -1);
  1402. impl->fd = dup(out_fence);
  1403. }
  1404. }
  1405. close(out_fence);
  1406. }
  1407. anv_execbuf_finish(&execbuf, &device->alloc);
  1408. return result;
  1409. }