Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

anv_blorp.c 60KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622
  1. /*
  2. * Copyright © 2016 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. #include "anv_private.h"
  24. static bool
  25. lookup_blorp_shader(struct blorp_context *blorp,
  26. const void *key, uint32_t key_size,
  27. uint32_t *kernel_out, void *prog_data_out)
  28. {
  29. struct anv_device *device = blorp->driver_ctx;
  30. /* The blorp cache must be a real cache */
  31. assert(device->blorp_shader_cache.cache);
  32. struct anv_shader_bin *bin =
  33. anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size);
  34. if (!bin)
  35. return false;
  36. /* The cache already has a reference and it's not going anywhere so there
  37. * is no need to hold a second reference.
  38. */
  39. anv_shader_bin_unref(device, bin);
  40. *kernel_out = bin->kernel.offset;
  41. *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
  42. return true;
  43. }
  44. static void
  45. upload_blorp_shader(struct blorp_context *blorp,
  46. const void *key, uint32_t key_size,
  47. const void *kernel, uint32_t kernel_size,
  48. const struct brw_stage_prog_data *prog_data,
  49. uint32_t prog_data_size,
  50. uint32_t *kernel_out, void *prog_data_out)
  51. {
  52. struct anv_device *device = blorp->driver_ctx;
  53. /* The blorp cache must be a real cache */
  54. assert(device->blorp_shader_cache.cache);
  55. struct anv_pipeline_bind_map bind_map = {
  56. .surface_count = 0,
  57. .sampler_count = 0,
  58. };
  59. struct anv_shader_bin *bin =
  60. anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache,
  61. key, key_size, kernel, kernel_size,
  62. prog_data, prog_data_size, &bind_map);
  63. /* The cache already has a reference and it's not going anywhere so there
  64. * is no need to hold a second reference.
  65. */
  66. anv_shader_bin_unref(device, bin);
  67. *kernel_out = bin->kernel.offset;
  68. *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
  69. }
  70. void
  71. anv_device_init_blorp(struct anv_device *device)
  72. {
  73. anv_pipeline_cache_init(&device->blorp_shader_cache, device, true);
  74. blorp_init(&device->blorp, device, &device->isl_dev);
  75. device->blorp.compiler = device->instance->physicalDevice.compiler;
  76. device->blorp.mocs.tex = device->default_mocs;
  77. device->blorp.mocs.rb = device->default_mocs;
  78. device->blorp.mocs.vb = device->default_mocs;
  79. device->blorp.lookup_shader = lookup_blorp_shader;
  80. device->blorp.upload_shader = upload_blorp_shader;
  81. switch (device->info.gen) {
  82. case 7:
  83. if (device->info.is_haswell) {
  84. device->blorp.exec = gen75_blorp_exec;
  85. } else {
  86. device->blorp.exec = gen7_blorp_exec;
  87. }
  88. break;
  89. case 8:
  90. device->blorp.exec = gen8_blorp_exec;
  91. break;
  92. case 9:
  93. device->blorp.exec = gen9_blorp_exec;
  94. break;
  95. default:
  96. unreachable("Unknown hardware generation");
  97. }
  98. }
  99. void
  100. anv_device_finish_blorp(struct anv_device *device)
  101. {
  102. blorp_finish(&device->blorp);
  103. anv_pipeline_cache_finish(&device->blorp_shader_cache);
  104. }
  105. static void
  106. get_blorp_surf_for_anv_buffer(struct anv_device *device,
  107. struct anv_buffer *buffer, uint64_t offset,
  108. uint32_t width, uint32_t height,
  109. uint32_t row_pitch, enum isl_format format,
  110. struct blorp_surf *blorp_surf,
  111. struct isl_surf *isl_surf)
  112. {
  113. const struct isl_format_layout *fmtl =
  114. isl_format_get_layout(format);
  115. /* ASTC is the only format which doesn't support linear layouts.
  116. * Create an equivalently sized surface with ISL to get around this.
  117. */
  118. if (fmtl->txc == ISL_TXC_ASTC) {
  119. /* Use an equivalently sized format */
  120. format = ISL_FORMAT_R32G32B32A32_UINT;
  121. assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
  122. /* Shrink the dimensions for the new format */
  123. width = DIV_ROUND_UP(width, fmtl->bw);
  124. height = DIV_ROUND_UP(height, fmtl->bh);
  125. }
  126. *blorp_surf = (struct blorp_surf) {
  127. .surf = isl_surf,
  128. .addr = {
  129. .buffer = buffer->bo,
  130. .offset = buffer->offset + offset,
  131. },
  132. };
  133. isl_surf_init(&device->isl_dev, isl_surf,
  134. .dim = ISL_SURF_DIM_2D,
  135. .format = format,
  136. .width = width,
  137. .height = height,
  138. .depth = 1,
  139. .levels = 1,
  140. .array_len = 1,
  141. .samples = 1,
  142. .min_pitch = row_pitch,
  143. .usage = ISL_SURF_USAGE_TEXTURE_BIT |
  144. ISL_SURF_USAGE_RENDER_TARGET_BIT,
  145. .tiling_flags = ISL_TILING_LINEAR_BIT);
  146. assert(isl_surf->row_pitch == row_pitch);
  147. }
  148. static void
  149. get_blorp_surf_for_anv_image(const struct anv_image *image,
  150. VkImageAspectFlags aspect,
  151. enum isl_aux_usage aux_usage,
  152. struct blorp_surf *blorp_surf)
  153. {
  154. if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT ||
  155. aux_usage == ISL_AUX_USAGE_HIZ)
  156. aux_usage = ISL_AUX_USAGE_NONE;
  157. const struct anv_surface *surface =
  158. anv_image_get_surface_for_aspect_mask(image, aspect);
  159. *blorp_surf = (struct blorp_surf) {
  160. .surf = &surface->isl,
  161. .addr = {
  162. .buffer = image->bo,
  163. .offset = image->offset + surface->offset,
  164. },
  165. };
  166. if (aux_usage != ISL_AUX_USAGE_NONE) {
  167. blorp_surf->aux_surf = &image->aux_surface.isl,
  168. blorp_surf->aux_addr = (struct blorp_address) {
  169. .buffer = image->bo,
  170. .offset = image->offset + image->aux_surface.offset,
  171. };
  172. blorp_surf->aux_usage = aux_usage;
  173. }
  174. }
  175. void anv_CmdCopyImage(
  176. VkCommandBuffer commandBuffer,
  177. VkImage srcImage,
  178. VkImageLayout srcImageLayout,
  179. VkImage dstImage,
  180. VkImageLayout dstImageLayout,
  181. uint32_t regionCount,
  182. const VkImageCopy* pRegions)
  183. {
  184. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  185. ANV_FROM_HANDLE(anv_image, src_image, srcImage);
  186. ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
  187. struct blorp_batch batch;
  188. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  189. for (unsigned r = 0; r < regionCount; r++) {
  190. VkOffset3D srcOffset =
  191. anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
  192. VkOffset3D dstOffset =
  193. anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
  194. VkExtent3D extent =
  195. anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
  196. unsigned dst_base_layer, layer_count;
  197. if (dst_image->type == VK_IMAGE_TYPE_3D) {
  198. dst_base_layer = pRegions[r].dstOffset.z;
  199. layer_count = pRegions[r].extent.depth;
  200. } else {
  201. dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
  202. layer_count = pRegions[r].dstSubresource.layerCount;
  203. }
  204. unsigned src_base_layer;
  205. if (src_image->type == VK_IMAGE_TYPE_3D) {
  206. src_base_layer = pRegions[r].srcOffset.z;
  207. } else {
  208. src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
  209. assert(pRegions[r].srcSubresource.layerCount == layer_count);
  210. }
  211. assert(pRegions[r].srcSubresource.aspectMask ==
  212. pRegions[r].dstSubresource.aspectMask);
  213. uint32_t a;
  214. for_each_bit(a, pRegions[r].dstSubresource.aspectMask) {
  215. VkImageAspectFlagBits aspect = (1 << a);
  216. struct blorp_surf src_surf, dst_surf;
  217. get_blorp_surf_for_anv_image(src_image, aspect, src_image->aux_usage,
  218. &src_surf);
  219. get_blorp_surf_for_anv_image(dst_image, aspect, dst_image->aux_usage,
  220. &dst_surf);
  221. for (unsigned i = 0; i < layer_count; i++) {
  222. blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel,
  223. src_base_layer + i,
  224. &dst_surf, pRegions[r].dstSubresource.mipLevel,
  225. dst_base_layer + i,
  226. srcOffset.x, srcOffset.y,
  227. dstOffset.x, dstOffset.y,
  228. extent.width, extent.height);
  229. }
  230. }
  231. }
  232. blorp_batch_finish(&batch);
  233. }
  234. static void
  235. copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
  236. struct anv_buffer *anv_buffer,
  237. struct anv_image *anv_image,
  238. uint32_t regionCount,
  239. const VkBufferImageCopy* pRegions,
  240. bool buffer_to_image)
  241. {
  242. struct blorp_batch batch;
  243. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  244. struct {
  245. struct blorp_surf surf;
  246. uint32_t level;
  247. VkOffset3D offset;
  248. } image, buffer, *src, *dst;
  249. buffer.level = 0;
  250. buffer.offset = (VkOffset3D) { 0, 0, 0 };
  251. if (buffer_to_image) {
  252. src = &buffer;
  253. dst = &image;
  254. } else {
  255. src = &image;
  256. dst = &buffer;
  257. }
  258. for (unsigned r = 0; r < regionCount; r++) {
  259. const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
  260. get_blorp_surf_for_anv_image(anv_image, aspect, anv_image->aux_usage,
  261. &image.surf);
  262. image.offset =
  263. anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
  264. image.level = pRegions[r].imageSubresource.mipLevel;
  265. VkExtent3D extent =
  266. anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
  267. if (anv_image->type != VK_IMAGE_TYPE_3D) {
  268. image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
  269. extent.depth = pRegions[r].imageSubresource.layerCount;
  270. }
  271. const enum isl_format buffer_format =
  272. anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
  273. aspect, VK_IMAGE_TILING_LINEAR);
  274. const VkExtent3D bufferImageExtent = {
  275. .width = pRegions[r].bufferRowLength ?
  276. pRegions[r].bufferRowLength : extent.width,
  277. .height = pRegions[r].bufferImageHeight ?
  278. pRegions[r].bufferImageHeight : extent.height,
  279. };
  280. const struct isl_format_layout *buffer_fmtl =
  281. isl_format_get_layout(buffer_format);
  282. const uint32_t buffer_row_pitch =
  283. DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) *
  284. (buffer_fmtl->bpb / 8);
  285. const uint32_t buffer_layer_stride =
  286. DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) *
  287. buffer_row_pitch;
  288. struct isl_surf buffer_isl_surf;
  289. get_blorp_surf_for_anv_buffer(cmd_buffer->device,
  290. anv_buffer, pRegions[r].bufferOffset,
  291. extent.width, extent.height,
  292. buffer_row_pitch, buffer_format,
  293. &buffer.surf, &buffer_isl_surf);
  294. for (unsigned z = 0; z < extent.depth; z++) {
  295. blorp_copy(&batch, &src->surf, src->level, src->offset.z,
  296. &dst->surf, dst->level, dst->offset.z,
  297. src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
  298. extent.width, extent.height);
  299. image.offset.z++;
  300. buffer.surf.addr.offset += buffer_layer_stride;
  301. }
  302. }
  303. blorp_batch_finish(&batch);
  304. }
  305. void anv_CmdCopyBufferToImage(
  306. VkCommandBuffer commandBuffer,
  307. VkBuffer srcBuffer,
  308. VkImage dstImage,
  309. VkImageLayout dstImageLayout,
  310. uint32_t regionCount,
  311. const VkBufferImageCopy* pRegions)
  312. {
  313. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  314. ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
  315. ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
  316. copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
  317. regionCount, pRegions, true);
  318. }
  319. void anv_CmdCopyImageToBuffer(
  320. VkCommandBuffer commandBuffer,
  321. VkImage srcImage,
  322. VkImageLayout srcImageLayout,
  323. VkBuffer dstBuffer,
  324. uint32_t regionCount,
  325. const VkBufferImageCopy* pRegions)
  326. {
  327. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  328. ANV_FROM_HANDLE(anv_image, src_image, srcImage);
  329. ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
  330. copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
  331. regionCount, pRegions, false);
  332. }
  333. static bool
  334. flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
  335. {
  336. bool flip = false;
  337. if (*src0 > *src1) {
  338. unsigned tmp = *src0;
  339. *src0 = *src1;
  340. *src1 = tmp;
  341. flip = !flip;
  342. }
  343. if (*dst0 > *dst1) {
  344. unsigned tmp = *dst0;
  345. *dst0 = *dst1;
  346. *dst1 = tmp;
  347. flip = !flip;
  348. }
  349. return flip;
  350. }
  351. void anv_CmdBlitImage(
  352. VkCommandBuffer commandBuffer,
  353. VkImage srcImage,
  354. VkImageLayout srcImageLayout,
  355. VkImage dstImage,
  356. VkImageLayout dstImageLayout,
  357. uint32_t regionCount,
  358. const VkImageBlit* pRegions,
  359. VkFilter filter)
  360. {
  361. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  362. ANV_FROM_HANDLE(anv_image, src_image, srcImage);
  363. ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
  364. struct blorp_surf src, dst;
  365. uint32_t gl_filter;
  366. switch (filter) {
  367. case VK_FILTER_NEAREST:
  368. gl_filter = 0x2600; /* GL_NEAREST */
  369. break;
  370. case VK_FILTER_LINEAR:
  371. gl_filter = 0x2601; /* GL_LINEAR */
  372. break;
  373. default:
  374. unreachable("Invalid filter");
  375. }
  376. struct blorp_batch batch;
  377. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  378. for (unsigned r = 0; r < regionCount; r++) {
  379. const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
  380. const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
  381. get_blorp_surf_for_anv_image(src_image, src_res->aspectMask,
  382. src_image->aux_usage, &src);
  383. get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask,
  384. dst_image->aux_usage, &dst);
  385. struct anv_format src_format =
  386. anv_get_format(&cmd_buffer->device->info, src_image->vk_format,
  387. src_res->aspectMask, src_image->tiling);
  388. struct anv_format dst_format =
  389. anv_get_format(&cmd_buffer->device->info, dst_image->vk_format,
  390. dst_res->aspectMask, dst_image->tiling);
  391. unsigned dst_start, dst_end;
  392. if (dst_image->type == VK_IMAGE_TYPE_3D) {
  393. assert(dst_res->baseArrayLayer == 0);
  394. dst_start = pRegions[r].dstOffsets[0].z;
  395. dst_end = pRegions[r].dstOffsets[1].z;
  396. } else {
  397. dst_start = dst_res->baseArrayLayer;
  398. dst_end = dst_start + dst_res->layerCount;
  399. }
  400. unsigned src_start, src_end;
  401. if (src_image->type == VK_IMAGE_TYPE_3D) {
  402. assert(src_res->baseArrayLayer == 0);
  403. src_start = pRegions[r].srcOffsets[0].z;
  404. src_end = pRegions[r].srcOffsets[1].z;
  405. } else {
  406. src_start = src_res->baseArrayLayer;
  407. src_end = src_start + src_res->layerCount;
  408. }
  409. bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
  410. float src_z_step = (float)(src_end + 1 - src_start) /
  411. (float)(dst_end + 1 - dst_start);
  412. if (flip_z) {
  413. src_start = src_end;
  414. src_z_step *= -1;
  415. }
  416. unsigned src_x0 = pRegions[r].srcOffsets[0].x;
  417. unsigned src_x1 = pRegions[r].srcOffsets[1].x;
  418. unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
  419. unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
  420. bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
  421. unsigned src_y0 = pRegions[r].srcOffsets[0].y;
  422. unsigned src_y1 = pRegions[r].srcOffsets[1].y;
  423. unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
  424. unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
  425. bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
  426. const unsigned num_layers = dst_end - dst_start;
  427. for (unsigned i = 0; i < num_layers; i++) {
  428. unsigned dst_z = dst_start + i;
  429. unsigned src_z = src_start + i * src_z_step;
  430. blorp_blit(&batch, &src, src_res->mipLevel, src_z,
  431. src_format.isl_format, src_format.swizzle,
  432. &dst, dst_res->mipLevel, dst_z,
  433. dst_format.isl_format,
  434. anv_swizzle_for_render(dst_format.swizzle),
  435. src_x0, src_y0, src_x1, src_y1,
  436. dst_x0, dst_y0, dst_x1, dst_y1,
  437. gl_filter, flip_x, flip_y);
  438. }
  439. }
  440. blorp_batch_finish(&batch);
  441. }
  442. static enum isl_format
  443. isl_format_for_size(unsigned size_B)
  444. {
  445. switch (size_B) {
  446. case 1: return ISL_FORMAT_R8_UINT;
  447. case 2: return ISL_FORMAT_R8G8_UINT;
  448. case 4: return ISL_FORMAT_R8G8B8A8_UINT;
  449. case 8: return ISL_FORMAT_R16G16B16A16_UINT;
  450. case 16: return ISL_FORMAT_R32G32B32A32_UINT;
  451. default:
  452. unreachable("Not a power-of-two format size");
  453. }
  454. }
  455. static void
  456. do_buffer_copy(struct blorp_batch *batch,
  457. struct anv_bo *src, uint64_t src_offset,
  458. struct anv_bo *dst, uint64_t dst_offset,
  459. int width, int height, int block_size)
  460. {
  461. struct anv_device *device = batch->blorp->driver_ctx;
  462. /* The actual format we pick doesn't matter as blorp will throw it away.
  463. * The only thing that actually matters is the size.
  464. */
  465. enum isl_format format = isl_format_for_size(block_size);
  466. struct isl_surf surf;
  467. isl_surf_init(&device->isl_dev, &surf,
  468. .dim = ISL_SURF_DIM_2D,
  469. .format = format,
  470. .width = width,
  471. .height = height,
  472. .depth = 1,
  473. .levels = 1,
  474. .array_len = 1,
  475. .samples = 1,
  476. .usage = ISL_SURF_USAGE_TEXTURE_BIT |
  477. ISL_SURF_USAGE_RENDER_TARGET_BIT,
  478. .tiling_flags = ISL_TILING_LINEAR_BIT);
  479. assert(surf.row_pitch == width * block_size);
  480. struct blorp_surf src_blorp_surf = {
  481. .surf = &surf,
  482. .addr = {
  483. .buffer = src,
  484. .offset = src_offset,
  485. },
  486. };
  487. struct blorp_surf dst_blorp_surf = {
  488. .surf = &surf,
  489. .addr = {
  490. .buffer = dst,
  491. .offset = dst_offset,
  492. },
  493. };
  494. blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
  495. 0, 0, 0, 0, width, height);
  496. }
  497. /**
  498. * Returns the greatest common divisor of a and b that is a power of two.
  499. */
  500. static inline uint64_t
  501. gcd_pow2_u64(uint64_t a, uint64_t b)
  502. {
  503. assert(a > 0 || b > 0);
  504. unsigned a_log2 = ffsll(a) - 1;
  505. unsigned b_log2 = ffsll(b) - 1;
  506. /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
  507. * case, the MIN2() will take the other one. If both are 0 then we will
  508. * hit the assert above.
  509. */
  510. return 1 << MIN2(a_log2, b_log2);
  511. }
  512. /* This is maximum possible width/height our HW can handle */
  513. #define MAX_SURFACE_DIM (1ull << 14)
  514. void anv_CmdCopyBuffer(
  515. VkCommandBuffer commandBuffer,
  516. VkBuffer srcBuffer,
  517. VkBuffer dstBuffer,
  518. uint32_t regionCount,
  519. const VkBufferCopy* pRegions)
  520. {
  521. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  522. ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
  523. ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
  524. struct blorp_batch batch;
  525. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  526. for (unsigned r = 0; r < regionCount; r++) {
  527. uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
  528. uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset;
  529. uint64_t copy_size = pRegions[r].size;
  530. /* First, we compute the biggest format that can be used with the
  531. * given offsets and size.
  532. */
  533. int bs = 16;
  534. bs = gcd_pow2_u64(bs, src_offset);
  535. bs = gcd_pow2_u64(bs, dst_offset);
  536. bs = gcd_pow2_u64(bs, pRegions[r].size);
  537. /* First, we make a bunch of max-sized copies */
  538. uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
  539. while (copy_size >= max_copy_size) {
  540. do_buffer_copy(&batch, src_buffer->bo, src_offset,
  541. dst_buffer->bo, dst_offset,
  542. MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
  543. copy_size -= max_copy_size;
  544. src_offset += max_copy_size;
  545. dst_offset += max_copy_size;
  546. }
  547. /* Now make a max-width copy */
  548. uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
  549. assert(height < MAX_SURFACE_DIM);
  550. if (height != 0) {
  551. uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
  552. do_buffer_copy(&batch, src_buffer->bo, src_offset,
  553. dst_buffer->bo, dst_offset,
  554. MAX_SURFACE_DIM, height, bs);
  555. copy_size -= rect_copy_size;
  556. src_offset += rect_copy_size;
  557. dst_offset += rect_copy_size;
  558. }
  559. /* Finally, make a small copy to finish it off */
  560. if (copy_size != 0) {
  561. do_buffer_copy(&batch, src_buffer->bo, src_offset,
  562. dst_buffer->bo, dst_offset,
  563. copy_size / bs, 1, bs);
  564. }
  565. }
  566. blorp_batch_finish(&batch);
  567. }
  568. void anv_CmdUpdateBuffer(
  569. VkCommandBuffer commandBuffer,
  570. VkBuffer dstBuffer,
  571. VkDeviceSize dstOffset,
  572. VkDeviceSize dataSize,
  573. const void* pData)
  574. {
  575. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  576. ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
  577. struct blorp_batch batch;
  578. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  579. /* We can't quite grab a full block because the state stream needs a
  580. * little data at the top to build its linked list.
  581. */
  582. const uint32_t max_update_size =
  583. cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
  584. assert(max_update_size < MAX_SURFACE_DIM * 4);
  585. while (dataSize) {
  586. const uint32_t copy_size = MIN2(dataSize, max_update_size);
  587. struct anv_state tmp_data =
  588. anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
  589. memcpy(tmp_data.map, pData, copy_size);
  590. int bs = 16;
  591. bs = gcd_pow2_u64(bs, dstOffset);
  592. bs = gcd_pow2_u64(bs, copy_size);
  593. do_buffer_copy(&batch,
  594. &cmd_buffer->device->dynamic_state_block_pool.bo,
  595. tmp_data.offset,
  596. dst_buffer->bo, dst_buffer->offset + dstOffset,
  597. copy_size / bs, 1, bs);
  598. dataSize -= copy_size;
  599. dstOffset += copy_size;
  600. pData = (void *)pData + copy_size;
  601. }
  602. blorp_batch_finish(&batch);
  603. }
  604. void anv_CmdFillBuffer(
  605. VkCommandBuffer commandBuffer,
  606. VkBuffer dstBuffer,
  607. VkDeviceSize dstOffset,
  608. VkDeviceSize fillSize,
  609. uint32_t data)
  610. {
  611. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  612. ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
  613. struct blorp_surf surf;
  614. struct isl_surf isl_surf;
  615. struct blorp_batch batch;
  616. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  617. if (fillSize == VK_WHOLE_SIZE) {
  618. fillSize = dst_buffer->size - dstOffset;
  619. /* Make sure fillSize is a multiple of 4 */
  620. fillSize &= ~3ull;
  621. }
  622. /* First, we compute the biggest format that can be used with the
  623. * given offsets and size.
  624. */
  625. int bs = 16;
  626. bs = gcd_pow2_u64(bs, dstOffset);
  627. bs = gcd_pow2_u64(bs, fillSize);
  628. enum isl_format isl_format = isl_format_for_size(bs);
  629. union isl_color_value color = {
  630. .u32 = { data, data, data, data },
  631. };
  632. const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
  633. while (fillSize >= max_fill_size) {
  634. get_blorp_surf_for_anv_buffer(cmd_buffer->device,
  635. dst_buffer, dstOffset,
  636. MAX_SURFACE_DIM, MAX_SURFACE_DIM,
  637. MAX_SURFACE_DIM * bs, isl_format,
  638. &surf, &isl_surf);
  639. blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
  640. 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
  641. color, NULL);
  642. fillSize -= max_fill_size;
  643. dstOffset += max_fill_size;
  644. }
  645. uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
  646. assert(height < MAX_SURFACE_DIM);
  647. if (height != 0) {
  648. const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
  649. get_blorp_surf_for_anv_buffer(cmd_buffer->device,
  650. dst_buffer, dstOffset,
  651. MAX_SURFACE_DIM, height,
  652. MAX_SURFACE_DIM * bs, isl_format,
  653. &surf, &isl_surf);
  654. blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
  655. 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
  656. color, NULL);
  657. fillSize -= rect_fill_size;
  658. dstOffset += rect_fill_size;
  659. }
  660. if (fillSize != 0) {
  661. const uint32_t width = fillSize / bs;
  662. get_blorp_surf_for_anv_buffer(cmd_buffer->device,
  663. dst_buffer, dstOffset,
  664. width, 1,
  665. width * bs, isl_format,
  666. &surf, &isl_surf);
  667. blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
  668. 0, 0, 1, 0, 0, width, 1,
  669. color, NULL);
  670. }
  671. blorp_batch_finish(&batch);
  672. }
  673. void anv_CmdClearColorImage(
  674. VkCommandBuffer commandBuffer,
  675. VkImage _image,
  676. VkImageLayout imageLayout,
  677. const VkClearColorValue* pColor,
  678. uint32_t rangeCount,
  679. const VkImageSubresourceRange* pRanges)
  680. {
  681. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  682. ANV_FROM_HANDLE(anv_image, image, _image);
  683. static const bool color_write_disable[4] = { false, false, false, false };
  684. struct blorp_batch batch;
  685. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  686. struct blorp_surf surf;
  687. get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
  688. image->aux_usage, &surf);
  689. for (unsigned r = 0; r < rangeCount; r++) {
  690. if (pRanges[r].aspectMask == 0)
  691. continue;
  692. assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
  693. struct anv_format src_format =
  694. anv_get_format(&cmd_buffer->device->info, image->vk_format,
  695. VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
  696. unsigned base_layer = pRanges[r].baseArrayLayer;
  697. unsigned layer_count = pRanges[r].layerCount;
  698. for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
  699. const unsigned level = pRanges[r].baseMipLevel + i;
  700. const unsigned level_width = anv_minify(image->extent.width, level);
  701. const unsigned level_height = anv_minify(image->extent.height, level);
  702. if (image->type == VK_IMAGE_TYPE_3D) {
  703. base_layer = 0;
  704. layer_count = anv_minify(image->extent.depth, level);
  705. }
  706. blorp_clear(&batch, &surf,
  707. src_format.isl_format, src_format.swizzle,
  708. level, base_layer, layer_count,
  709. 0, 0, level_width, level_height,
  710. vk_to_isl_color(*pColor), color_write_disable);
  711. }
  712. }
  713. blorp_batch_finish(&batch);
  714. }
  715. void anv_CmdClearDepthStencilImage(
  716. VkCommandBuffer commandBuffer,
  717. VkImage image_h,
  718. VkImageLayout imageLayout,
  719. const VkClearDepthStencilValue* pDepthStencil,
  720. uint32_t rangeCount,
  721. const VkImageSubresourceRange* pRanges)
  722. {
  723. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  724. ANV_FROM_HANDLE(anv_image, image, image_h);
  725. struct blorp_batch batch;
  726. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  727. struct blorp_surf depth, stencil;
  728. if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
  729. get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
  730. ISL_AUX_USAGE_NONE, &depth);
  731. } else {
  732. memset(&depth, 0, sizeof(depth));
  733. }
  734. if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
  735. get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
  736. ISL_AUX_USAGE_NONE, &stencil);
  737. } else {
  738. memset(&stencil, 0, sizeof(stencil));
  739. }
  740. for (unsigned r = 0; r < rangeCount; r++) {
  741. if (pRanges[r].aspectMask == 0)
  742. continue;
  743. bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
  744. bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
  745. unsigned base_layer = pRanges[r].baseArrayLayer;
  746. unsigned layer_count = pRanges[r].layerCount;
  747. for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
  748. const unsigned level = pRanges[r].baseMipLevel + i;
  749. const unsigned level_width = anv_minify(image->extent.width, level);
  750. const unsigned level_height = anv_minify(image->extent.height, level);
  751. if (image->type == VK_IMAGE_TYPE_3D)
  752. layer_count = anv_minify(image->extent.depth, level);
  753. blorp_clear_depth_stencil(&batch, &depth, &stencil,
  754. level, base_layer, layer_count,
  755. 0, 0, level_width, level_height,
  756. clear_depth, pDepthStencil->depth,
  757. clear_stencil ? 0xff : 0,
  758. pDepthStencil->stencil);
  759. }
  760. }
  761. blorp_batch_finish(&batch);
  762. }
  763. struct anv_state
  764. anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
  765. uint32_t num_entries,
  766. uint32_t *state_offset)
  767. {
  768. struct anv_state bt_state =
  769. anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
  770. state_offset);
  771. if (bt_state.map == NULL) {
  772. /* We ran out of space. Grab a new binding table block. */
  773. MAYBE_UNUSED VkResult result =
  774. anv_cmd_buffer_new_binding_table_block(cmd_buffer);
  775. assert(result == VK_SUCCESS);
  776. /* Re-emit state base addresses so we get the new surface state base
  777. * address before we start emitting binding tables etc.
  778. */
  779. anv_cmd_buffer_emit_state_base_address(cmd_buffer);
  780. bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
  781. state_offset);
  782. assert(bt_state.map != NULL);
  783. }
  784. return bt_state;
  785. }
  786. static uint32_t
  787. binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
  788. struct anv_state surface_state)
  789. {
  790. uint32_t state_offset;
  791. struct anv_state bt_state =
  792. anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset);
  793. uint32_t *bt_map = bt_state.map;
  794. bt_map[0] = surface_state.offset + state_offset;
  795. return bt_state.offset;
  796. }
  797. static void
  798. clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
  799. struct blorp_batch *batch,
  800. const VkClearAttachment *attachment,
  801. uint32_t rectCount, const VkClearRect *pRects)
  802. {
  803. const struct anv_subpass *subpass = cmd_buffer->state.subpass;
  804. const uint32_t color_att = attachment->colorAttachment;
  805. const uint32_t att_idx = subpass->color_attachments[color_att];
  806. if (att_idx == VK_ATTACHMENT_UNUSED)
  807. return;
  808. struct anv_render_pass_attachment *pass_att =
  809. &cmd_buffer->state.pass->attachments[att_idx];
  810. struct anv_attachment_state *att_state =
  811. &cmd_buffer->state.attachments[att_idx];
  812. uint32_t binding_table =
  813. binding_table_for_surface_state(cmd_buffer, att_state->color_rt_state);
  814. union isl_color_value clear_color =
  815. vk_to_isl_color(attachment->clearValue.color);
  816. for (uint32_t r = 0; r < rectCount; ++r) {
  817. const VkOffset2D offset = pRects[r].rect.offset;
  818. const VkExtent2D extent = pRects[r].rect.extent;
  819. blorp_clear_attachments(batch, binding_table,
  820. ISL_FORMAT_UNSUPPORTED, pass_att->samples,
  821. pRects[r].baseArrayLayer,
  822. pRects[r].layerCount,
  823. offset.x, offset.y,
  824. offset.x + extent.width, offset.y + extent.height,
  825. true, clear_color, false, 0.0f, 0, 0);
  826. }
  827. }
  828. static void
  829. clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
  830. struct blorp_batch *batch,
  831. const VkClearAttachment *attachment,
  832. uint32_t rectCount, const VkClearRect *pRects)
  833. {
  834. static const union isl_color_value color_value = { .u32 = { 0, } };
  835. const struct anv_subpass *subpass = cmd_buffer->state.subpass;
  836. const uint32_t att_idx = subpass->depth_stencil_attachment;
  837. if (att_idx == VK_ATTACHMENT_UNUSED)
  838. return;
  839. struct anv_render_pass_attachment *pass_att =
  840. &cmd_buffer->state.pass->attachments[att_idx];
  841. bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
  842. bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
  843. enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
  844. if (clear_depth) {
  845. depth_format = anv_get_isl_format(&cmd_buffer->device->info,
  846. pass_att->format,
  847. VK_IMAGE_ASPECT_DEPTH_BIT,
  848. VK_IMAGE_TILING_OPTIMAL);
  849. }
  850. uint32_t binding_table =
  851. binding_table_for_surface_state(cmd_buffer,
  852. cmd_buffer->state.null_surface_state);
  853. for (uint32_t r = 0; r < rectCount; ++r) {
  854. const VkOffset2D offset = pRects[r].rect.offset;
  855. const VkExtent2D extent = pRects[r].rect.extent;
  856. VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
  857. blorp_clear_attachments(batch, binding_table,
  858. depth_format, pass_att->samples,
  859. pRects[r].baseArrayLayer,
  860. pRects[r].layerCount,
  861. offset.x, offset.y,
  862. offset.x + extent.width, offset.y + extent.height,
  863. false, color_value,
  864. clear_depth, value.depth,
  865. clear_stencil ? 0xff : 0, value.stencil);
  866. }
  867. }
  868. void anv_CmdClearAttachments(
  869. VkCommandBuffer commandBuffer,
  870. uint32_t attachmentCount,
  871. const VkClearAttachment* pAttachments,
  872. uint32_t rectCount,
  873. const VkClearRect* pRects)
  874. {
  875. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  876. /* Because this gets called within a render pass, we tell blorp not to
  877. * trash our depth and stencil buffers.
  878. */
  879. struct blorp_batch batch;
  880. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
  881. BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
  882. for (uint32_t a = 0; a < attachmentCount; ++a) {
  883. if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
  884. clear_color_attachment(cmd_buffer, &batch,
  885. &pAttachments[a],
  886. rectCount, pRects);
  887. } else {
  888. clear_depth_stencil_attachment(cmd_buffer, &batch,
  889. &pAttachments[a],
  890. rectCount, pRects);
  891. }
  892. }
  893. blorp_batch_finish(&batch);
  894. }
  895. enum subpass_stage {
  896. SUBPASS_STAGE_LOAD,
  897. SUBPASS_STAGE_DRAW,
  898. SUBPASS_STAGE_RESOLVE,
  899. };
  900. static bool
  901. attachment_needs_flush(struct anv_cmd_buffer *cmd_buffer,
  902. struct anv_render_pass_attachment *att,
  903. enum subpass_stage stage)
  904. {
  905. struct anv_render_pass *pass = cmd_buffer->state.pass;
  906. struct anv_subpass *subpass = cmd_buffer->state.subpass;
  907. unsigned subpass_idx = subpass - pass->subpasses;
  908. assert(subpass_idx < pass->subpass_count);
  909. /* We handle this subpass specially based on the current stage */
  910. enum anv_subpass_usage usage = att->subpass_usage[subpass_idx];
  911. switch (stage) {
  912. case SUBPASS_STAGE_LOAD:
  913. if (usage & (ANV_SUBPASS_USAGE_INPUT | ANV_SUBPASS_USAGE_RESOLVE_SRC))
  914. return true;
  915. break;
  916. case SUBPASS_STAGE_DRAW:
  917. if (usage & ANV_SUBPASS_USAGE_RESOLVE_SRC)
  918. return true;
  919. break;
  920. default:
  921. break;
  922. }
  923. for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
  924. usage = att->subpass_usage[s];
  925. /* If this attachment is going to be used as an input in this or any
  926. * future subpass, then we need to flush its cache and invalidate the
  927. * texture cache.
  928. */
  929. if (att->subpass_usage[s] & ANV_SUBPASS_USAGE_INPUT)
  930. return true;
  931. if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
  932. /* We found another subpass that draws to this attachment. We'll
  933. * wait to resolve until then.
  934. */
  935. return false;
  936. }
  937. }
  938. return false;
  939. }
  940. static void
  941. anv_cmd_buffer_flush_attachments(struct anv_cmd_buffer *cmd_buffer,
  942. enum subpass_stage stage)
  943. {
  944. struct anv_subpass *subpass = cmd_buffer->state.subpass;
  945. struct anv_render_pass *pass = cmd_buffer->state.pass;
  946. for (uint32_t i = 0; i < subpass->color_count; ++i) {
  947. uint32_t att = subpass->color_attachments[i];
  948. assert(att < pass->attachment_count);
  949. if (attachment_needs_flush(cmd_buffer, &pass->attachments[att], stage)) {
  950. cmd_buffer->state.pending_pipe_bits |=
  951. ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
  952. ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
  953. }
  954. }
  955. if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
  956. uint32_t att = subpass->depth_stencil_attachment;
  957. assert(att < pass->attachment_count);
  958. if (attachment_needs_flush(cmd_buffer, &pass->attachments[att], stage)) {
  959. cmd_buffer->state.pending_pipe_bits |=
  960. ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
  961. ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
  962. }
  963. }
  964. }
  965. static bool
  966. subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
  967. {
  968. const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
  969. uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
  970. for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
  971. uint32_t a = cmd_state->subpass->color_attachments[i];
  972. if (cmd_state->attachments[a].pending_clear_aspects) {
  973. return true;
  974. }
  975. }
  976. if (ds != VK_ATTACHMENT_UNUSED &&
  977. cmd_state->attachments[ds].pending_clear_aspects) {
  978. return true;
  979. }
  980. return false;
  981. }
  982. void
  983. anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
  984. {
  985. const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
  986. const VkRect2D render_area = cmd_buffer->state.render_area;
  987. if (!subpass_needs_clear(cmd_buffer))
  988. return;
  989. /* Because this gets called within a render pass, we tell blorp not to
  990. * trash our depth and stencil buffers.
  991. */
  992. struct blorp_batch batch;
  993. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
  994. BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
  995. VkClearRect clear_rect = {
  996. .rect = cmd_buffer->state.render_area,
  997. .baseArrayLayer = 0,
  998. .layerCount = cmd_buffer->state.framebuffer->layers,
  999. };
  1000. struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
  1001. for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
  1002. const uint32_t a = cmd_state->subpass->color_attachments[i];
  1003. struct anv_attachment_state *att_state = &cmd_state->attachments[a];
  1004. if (!att_state->pending_clear_aspects)
  1005. continue;
  1006. assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
  1007. struct anv_image_view *iview = fb->attachments[a];
  1008. const struct anv_image *image = iview->image;
  1009. struct blorp_surf surf;
  1010. get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
  1011. att_state->aux_usage, &surf);
  1012. surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
  1013. if (att_state->fast_clear) {
  1014. blorp_fast_clear(&batch, &surf, iview->isl.format,
  1015. iview->isl.base_level,
  1016. iview->isl.base_array_layer, fb->layers,
  1017. render_area.offset.x, render_area.offset.y,
  1018. render_area.offset.x + render_area.extent.width,
  1019. render_area.offset.y + render_area.extent.height);
  1020. /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
  1021. *
  1022. * "After Render target fast clear, pipe-control with color cache
  1023. * write-flush must be issued before sending any DRAW commands on
  1024. * that render target."
  1025. */
  1026. cmd_buffer->state.pending_pipe_bits |=
  1027. ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
  1028. } else {
  1029. blorp_clear(&batch, &surf, iview->isl.format,
  1030. anv_swizzle_for_render(iview->isl.swizzle),
  1031. iview->isl.base_level,
  1032. iview->isl.base_array_layer, fb->layers,
  1033. render_area.offset.x, render_area.offset.y,
  1034. render_area.offset.x + render_area.extent.width,
  1035. render_area.offset.y + render_area.extent.height,
  1036. surf.clear_color, NULL);
  1037. }
  1038. att_state->pending_clear_aspects = 0;
  1039. }
  1040. const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
  1041. if (ds != VK_ATTACHMENT_UNUSED &&
  1042. cmd_state->attachments[ds].pending_clear_aspects) {
  1043. VkClearAttachment clear_att = {
  1044. .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
  1045. .clearValue = cmd_state->attachments[ds].clear_value,
  1046. };
  1047. const uint8_t gen = cmd_buffer->device->info.gen;
  1048. bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage ==
  1049. ISL_AUX_USAGE_HIZ;
  1050. const struct anv_image_view *iview = fb->attachments[ds];
  1051. if (clear_with_hiz) {
  1052. const bool clear_depth = clear_att.aspectMask &
  1053. VK_IMAGE_ASPECT_DEPTH_BIT;
  1054. const bool clear_stencil = clear_att.aspectMask &
  1055. VK_IMAGE_ASPECT_STENCIL_BIT;
  1056. /* Check against restrictions for depth buffer clearing. A great GPU
  1057. * performance benefit isn't expected when using the HZ sequence for
  1058. * stencil-only clears. Therefore, we don't emit a HZ op sequence for
  1059. * a stencil clear in addition to using the BLORP-fallback for depth.
  1060. */
  1061. if (clear_depth) {
  1062. if (!blorp_can_hiz_clear_depth(gen, iview->isl.format,
  1063. iview->image->samples,
  1064. render_area.offset.x,
  1065. render_area.offset.y,
  1066. render_area.offset.x +
  1067. render_area.extent.width,
  1068. render_area.offset.y +
  1069. render_area.extent.height)) {
  1070. clear_with_hiz = false;
  1071. } else if (clear_att.clearValue.depthStencil.depth !=
  1072. ANV_HZ_FC_VAL) {
  1073. /* Don't enable fast depth clears for any color not equal to
  1074. * ANV_HZ_FC_VAL.
  1075. */
  1076. clear_with_hiz = false;
  1077. } else if (gen == 8 &&
  1078. anv_can_sample_with_hiz(cmd_buffer->device->info.gen,
  1079. iview->image->samples)) {
  1080. /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
  1081. * fast-cleared portion of a HiZ buffer. Testing has revealed
  1082. * that Gen8 only supports returning 0.0f. Gens prior to gen8 do
  1083. * not support this feature at all.
  1084. */
  1085. clear_with_hiz = false;
  1086. }
  1087. }
  1088. if (clear_with_hiz) {
  1089. blorp_gen8_hiz_clear_attachments(&batch, iview->image->samples,
  1090. render_area.offset.x,
  1091. render_area.offset.y,
  1092. render_area.offset.x +
  1093. render_area.extent.width,
  1094. render_area.offset.y +
  1095. render_area.extent.height,
  1096. clear_depth, clear_stencil,
  1097. clear_att.clearValue.
  1098. depthStencil.stencil);
  1099. }
  1100. }
  1101. if (!clear_with_hiz) {
  1102. clear_depth_stencil_attachment(cmd_buffer, &batch,
  1103. &clear_att, 1, &clear_rect);
  1104. }
  1105. cmd_state->attachments[ds].pending_clear_aspects = 0;
  1106. }
  1107. blorp_batch_finish(&batch);
  1108. anv_cmd_buffer_flush_attachments(cmd_buffer, SUBPASS_STAGE_LOAD);
  1109. }
  1110. static void
  1111. resolve_image(struct blorp_batch *batch,
  1112. const struct anv_image *src_image,
  1113. uint32_t src_level, uint32_t src_layer,
  1114. const struct anv_image *dst_image,
  1115. uint32_t dst_level, uint32_t dst_layer,
  1116. VkImageAspectFlags aspect_mask,
  1117. uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
  1118. uint32_t width, uint32_t height)
  1119. {
  1120. assert(src_image->type == VK_IMAGE_TYPE_2D);
  1121. assert(src_image->samples > 1);
  1122. assert(dst_image->type == VK_IMAGE_TYPE_2D);
  1123. assert(dst_image->samples == 1);
  1124. uint32_t a;
  1125. for_each_bit(a, aspect_mask) {
  1126. VkImageAspectFlagBits aspect = 1 << a;
  1127. struct blorp_surf src_surf, dst_surf;
  1128. get_blorp_surf_for_anv_image(src_image, aspect,
  1129. src_image->aux_usage, &src_surf);
  1130. get_blorp_surf_for_anv_image(dst_image, aspect,
  1131. dst_image->aux_usage, &dst_surf);
  1132. blorp_blit(batch,
  1133. &src_surf, src_level, src_layer,
  1134. ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
  1135. &dst_surf, dst_level, dst_layer,
  1136. ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
  1137. src_x, src_y, src_x + width, src_y + height,
  1138. dst_x, dst_y, dst_x + width, dst_y + height,
  1139. 0x2600 /* GL_NEAREST */, false, false);
  1140. }
  1141. }
  1142. void anv_CmdResolveImage(
  1143. VkCommandBuffer commandBuffer,
  1144. VkImage srcImage,
  1145. VkImageLayout srcImageLayout,
  1146. VkImage dstImage,
  1147. VkImageLayout dstImageLayout,
  1148. uint32_t regionCount,
  1149. const VkImageResolve* pRegions)
  1150. {
  1151. ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
  1152. ANV_FROM_HANDLE(anv_image, src_image, srcImage);
  1153. ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
  1154. struct blorp_batch batch;
  1155. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  1156. for (uint32_t r = 0; r < regionCount; r++) {
  1157. assert(pRegions[r].srcSubresource.aspectMask ==
  1158. pRegions[r].dstSubresource.aspectMask);
  1159. assert(pRegions[r].srcSubresource.layerCount ==
  1160. pRegions[r].dstSubresource.layerCount);
  1161. const uint32_t layer_count = pRegions[r].dstSubresource.layerCount;
  1162. for (uint32_t layer = 0; layer < layer_count; layer++) {
  1163. resolve_image(&batch,
  1164. src_image, pRegions[r].srcSubresource.mipLevel,
  1165. pRegions[r].srcSubresource.baseArrayLayer + layer,
  1166. dst_image, pRegions[r].dstSubresource.mipLevel,
  1167. pRegions[r].dstSubresource.baseArrayLayer + layer,
  1168. pRegions[r].dstSubresource.aspectMask,
  1169. pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
  1170. pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
  1171. pRegions[r].extent.width, pRegions[r].extent.height);
  1172. }
  1173. }
  1174. blorp_batch_finish(&batch);
  1175. }
  1176. static void
  1177. ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
  1178. struct blorp_batch *batch,
  1179. uint32_t att)
  1180. {
  1181. struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
  1182. struct anv_attachment_state *att_state =
  1183. &cmd_buffer->state.attachments[att];
  1184. if (att_state->aux_usage == ISL_AUX_USAGE_NONE)
  1185. return; /* Nothing to resolve */
  1186. assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
  1187. att_state->aux_usage == ISL_AUX_USAGE_CCS_D);
  1188. struct anv_render_pass *pass = cmd_buffer->state.pass;
  1189. struct anv_subpass *subpass = cmd_buffer->state.subpass;
  1190. unsigned subpass_idx = subpass - pass->subpasses;
  1191. assert(subpass_idx < pass->subpass_count);
  1192. /* Scan forward to see what all ways this attachment will be used.
  1193. * Ideally, we would like to resolve in the same subpass as the last write
  1194. * of a particular attachment. That way we only resolve once but it's
  1195. * still hot in the cache.
  1196. */
  1197. bool found_draw = false;
  1198. enum anv_subpass_usage usage = 0;
  1199. for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
  1200. usage |= pass->attachments[att].subpass_usage[s];
  1201. if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
  1202. /* We found another subpass that draws to this attachment. We'll
  1203. * wait to resolve until then.
  1204. */
  1205. found_draw = true;
  1206. break;
  1207. }
  1208. }
  1209. struct anv_image_view *iview = fb->attachments[att];
  1210. const struct anv_image *image = iview->image;
  1211. assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
  1212. enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE;
  1213. if (!found_draw) {
  1214. /* This is the last subpass that writes to this attachment so we need to
  1215. * resolve here. Ideally, we would like to only resolve if the storeOp
  1216. * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure
  1217. * that the CCS bits are set to "resolved" because there may be copy or
  1218. * blit operations (which may ignore CCS) between now and the next time
  1219. * we render and we need to ensure that anything they write will be
  1220. * respected in the next render. Unfortunately, the hardware does not
  1221. * provide us with any sort of "invalidate" pass that sets the CCS to
  1222. * "resolved" without writing to the render target.
  1223. */
  1224. if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) {
  1225. /* The image destination surface doesn't support compression outside
  1226. * the render pass. We need a full resolve.
  1227. */
  1228. resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
  1229. } else if (att_state->fast_clear) {
  1230. /* We don't know what to do with clear colors outside the render
  1231. * pass. We need a partial resolve. Only transparent black is
  1232. * built into the surface state object and thus no resolve is
  1233. * required for this case.
  1234. */
  1235. if (att_state->clear_value.color.uint32[0] ||
  1236. att_state->clear_value.color.uint32[1] ||
  1237. att_state->clear_value.color.uint32[2] ||
  1238. att_state->clear_value.color.uint32[3])
  1239. resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
  1240. } else {
  1241. /* The image "natively" supports all the compression we care about
  1242. * and we don't need to resolve at all. If this is the case, we also
  1243. * don't need to resolve for any of the input attachment cases below.
  1244. */
  1245. }
  1246. } else if (usage & ANV_SUBPASS_USAGE_INPUT) {
  1247. /* Input attachments are clear-color aware so, at least on Sky Lake, we
  1248. * can frequently sample from them with no resolves at all.
  1249. */
  1250. if (att_state->aux_usage != att_state->input_aux_usage) {
  1251. assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE);
  1252. resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
  1253. } else if (!att_state->clear_color_is_zero_one) {
  1254. /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
  1255. *
  1256. * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
  1257. * is fast cleared with non-0/1 clear value, this RT must be
  1258. * partially resolved (refer to Partial Resolve operation) before
  1259. * binding this surface to Sampler."
  1260. */
  1261. resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
  1262. }
  1263. }
  1264. if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
  1265. return;
  1266. struct blorp_surf surf;
  1267. get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
  1268. att_state->aux_usage, &surf);
  1269. surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
  1270. /* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
  1271. *
  1272. * "When performing a render target resolve, PIPE_CONTROL with end of
  1273. * pipe sync must be delivered."
  1274. *
  1275. * This comment is a bit cryptic and doesn't really tell you what's going
  1276. * or what's really needed. It appears that fast clear ops are not
  1277. * properly synchronized with other drawing. We need to use a PIPE_CONTROL
  1278. * to ensure that the contents of the previous draw hit the render target
  1279. * before we resolve and then use a second PIPE_CONTROL after the resolve
  1280. * to ensure that it is completed before any additional drawing occurs.
  1281. */
  1282. cmd_buffer->state.pending_pipe_bits |=
  1283. ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
  1284. for (uint32_t layer = 0; layer < fb->layers; layer++) {
  1285. blorp_ccs_resolve(batch, &surf,
  1286. iview->isl.base_level,
  1287. iview->isl.base_array_layer + layer,
  1288. iview->isl.format, resolve_op);
  1289. }
  1290. cmd_buffer->state.pending_pipe_bits |=
  1291. ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
  1292. /* Once we've done any sort of resolve, we're no longer fast-cleared */
  1293. att_state->fast_clear = false;
  1294. }
  1295. void
  1296. anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
  1297. {
  1298. struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
  1299. struct anv_subpass *subpass = cmd_buffer->state.subpass;
  1300. struct blorp_batch batch;
  1301. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  1302. for (uint32_t i = 0; i < subpass->color_count; ++i) {
  1303. ccs_resolve_attachment(cmd_buffer, &batch,
  1304. subpass->color_attachments[i]);
  1305. }
  1306. anv_cmd_buffer_flush_attachments(cmd_buffer, SUBPASS_STAGE_DRAW);
  1307. if (subpass->has_resolve) {
  1308. for (uint32_t i = 0; i < subpass->color_count; ++i) {
  1309. uint32_t src_att = subpass->color_attachments[i];
  1310. uint32_t dst_att = subpass->resolve_attachments[i];
  1311. if (dst_att == VK_ATTACHMENT_UNUSED)
  1312. continue;
  1313. if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
  1314. /* From the Vulkan 1.0 spec:
  1315. *
  1316. * If the first use of an attachment in a render pass is as a
  1317. * resolve attachment, then the loadOp is effectively ignored
  1318. * as the resolve is guaranteed to overwrite all pixels in the
  1319. * render area.
  1320. */
  1321. cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
  1322. }
  1323. struct anv_image_view *src_iview = fb->attachments[src_att];
  1324. struct anv_image_view *dst_iview = fb->attachments[dst_att];
  1325. const VkRect2D render_area = cmd_buffer->state.render_area;
  1326. assert(src_iview->aspect_mask == dst_iview->aspect_mask);
  1327. resolve_image(&batch, src_iview->image,
  1328. src_iview->isl.base_level,
  1329. src_iview->isl.base_array_layer,
  1330. dst_iview->image,
  1331. dst_iview->isl.base_level,
  1332. dst_iview->isl.base_array_layer,
  1333. src_iview->aspect_mask,
  1334. render_area.offset.x, render_area.offset.y,
  1335. render_area.offset.x, render_area.offset.y,
  1336. render_area.extent.width, render_area.extent.height);
  1337. ccs_resolve_attachment(cmd_buffer, &batch, dst_att);
  1338. }
  1339. anv_cmd_buffer_flush_attachments(cmd_buffer, SUBPASS_STAGE_RESOLVE);
  1340. }
  1341. blorp_batch_finish(&batch);
  1342. }
  1343. void
  1344. anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
  1345. const struct anv_image *image,
  1346. enum blorp_hiz_op op)
  1347. {
  1348. assert(image);
  1349. /* Don't resolve depth buffers without an auxiliary HiZ buffer and
  1350. * don't perform such a resolve on gens that don't support it.
  1351. */
  1352. if (cmd_buffer->device->info.gen < 8 ||
  1353. image->aux_usage != ISL_AUX_USAGE_HIZ)
  1354. return;
  1355. assert(op == BLORP_HIZ_OP_HIZ_RESOLVE ||
  1356. op == BLORP_HIZ_OP_DEPTH_RESOLVE);
  1357. struct blorp_batch batch;
  1358. blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
  1359. struct blorp_surf surf;
  1360. get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
  1361. ISL_AUX_USAGE_NONE, &surf);
  1362. /* Manually add the aux HiZ surf */
  1363. surf.aux_surf = &image->aux_surface.isl,
  1364. surf.aux_addr = (struct blorp_address) {
  1365. .buffer = image->bo,
  1366. .offset = image->offset + image->aux_surface.offset,
  1367. };
  1368. surf.aux_usage = ISL_AUX_USAGE_HIZ;
  1369. surf.clear_color.u32[0] = (uint32_t) ANV_HZ_FC_VAL;
  1370. blorp_gen6_hiz_op(&batch, &surf, 0, 0, op);
  1371. blorp_batch_finish(&batch);
  1372. }