Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

radv_meta_fast_clear.c 29KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849
  1. /*
  2. * Copyright © 2016 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. #include <assert.h>
  24. #include <stdbool.h>
  25. #include "radv_meta.h"
  26. #include "radv_private.h"
  27. #include "sid.h"
  28. static nir_shader *
  29. build_dcc_decompress_compute_shader(struct radv_device *dev)
  30. {
  31. nir_builder b;
  32. const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
  33. false,
  34. false,
  35. GLSL_TYPE_FLOAT);
  36. const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
  37. false,
  38. false,
  39. GLSL_TYPE_FLOAT);
  40. nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
  41. b.shader->info.name = ralloc_strdup(b.shader, "dcc_decompress_compute");
  42. /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
  43. b.shader->info.cs.local_size[0] = 16;
  44. b.shader->info.cs.local_size[1] = 16;
  45. b.shader->info.cs.local_size[2] = 1;
  46. nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
  47. buf_type, "s_tex");
  48. input_img->data.descriptor_set = 0;
  49. input_img->data.binding = 0;
  50. nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
  51. img_type, "out_img");
  52. output_img->data.descriptor_set = 0;
  53. output_img->data.binding = 1;
  54. nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
  55. nir_ssa_def *wg_id = nir_load_work_group_id(&b);
  56. nir_ssa_def *block_size = nir_imm_ivec4(&b,
  57. b.shader->info.cs.local_size[0],
  58. b.shader->info.cs.local_size[1],
  59. b.shader->info.cs.local_size[2], 0);
  60. nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
  61. nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
  62. nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
  63. tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
  64. tex->op = nir_texop_txf;
  65. tex->src[0].src_type = nir_tex_src_coord;
  66. tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
  67. tex->src[1].src_type = nir_tex_src_lod;
  68. tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
  69. tex->src[2].src_type = nir_tex_src_texture_deref;
  70. tex->src[2].src = nir_src_for_ssa(input_img_deref);
  71. tex->dest_type = nir_type_float;
  72. tex->is_array = false;
  73. tex->coord_components = 2;
  74. nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
  75. nir_builder_instr_insert(&b, &tex->instr);
  76. nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier);
  77. nir_builder_instr_insert(&b, &membar->instr);
  78. nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_barrier);
  79. nir_builder_instr_insert(&b, &bar->instr);
  80. nir_ssa_def *outval = &tex->dest.ssa;
  81. nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
  82. store->num_components = 4;
  83. store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
  84. store->src[1] = nir_src_for_ssa(global_id);
  85. store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
  86. store->src[3] = nir_src_for_ssa(outval);
  87. nir_builder_instr_insert(&b, &store->instr);
  88. return b.shader;
  89. }
  90. static VkResult
  91. create_dcc_compress_compute(struct radv_device *device)
  92. {
  93. VkResult result = VK_SUCCESS;
  94. struct radv_shader_module cs = { .nir = NULL };
  95. cs.nir = build_dcc_decompress_compute_shader(device);
  96. VkDescriptorSetLayoutCreateInfo ds_create_info = {
  97. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  98. .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
  99. .bindingCount = 2,
  100. .pBindings = (VkDescriptorSetLayoutBinding[]) {
  101. {
  102. .binding = 0,
  103. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  104. .descriptorCount = 1,
  105. .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  106. .pImmutableSamplers = NULL
  107. },
  108. {
  109. .binding = 1,
  110. .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
  111. .descriptorCount = 1,
  112. .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  113. .pImmutableSamplers = NULL
  114. },
  115. }
  116. };
  117. result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
  118. &ds_create_info,
  119. &device->meta_state.alloc,
  120. &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
  121. if (result != VK_SUCCESS)
  122. goto cleanup;
  123. VkPipelineLayoutCreateInfo pl_create_info = {
  124. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  125. .setLayoutCount = 1,
  126. .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
  127. .pushConstantRangeCount = 1,
  128. .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 8},
  129. };
  130. result = radv_CreatePipelineLayout(radv_device_to_handle(device),
  131. &pl_create_info,
  132. &device->meta_state.alloc,
  133. &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
  134. if (result != VK_SUCCESS)
  135. goto cleanup;
  136. /* compute shader */
  137. VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
  138. .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  139. .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  140. .module = radv_shader_module_to_handle(&cs),
  141. .pName = "main",
  142. .pSpecializationInfo = NULL,
  143. };
  144. VkComputePipelineCreateInfo vk_pipeline_info = {
  145. .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  146. .stage = pipeline_shader_stage,
  147. .flags = 0,
  148. .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
  149. };
  150. result = radv_CreateComputePipelines(radv_device_to_handle(device),
  151. radv_pipeline_cache_to_handle(&device->meta_state.cache),
  152. 1, &vk_pipeline_info, NULL,
  153. &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
  154. if (result != VK_SUCCESS)
  155. goto cleanup;
  156. cleanup:
  157. ralloc_free(cs.nir);
  158. return result;
  159. }
  160. static VkResult
  161. create_pass(struct radv_device *device)
  162. {
  163. VkResult result;
  164. VkDevice device_h = radv_device_to_handle(device);
  165. const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
  166. VkAttachmentDescription attachment;
  167. attachment.format = VK_FORMAT_UNDEFINED;
  168. attachment.samples = 1;
  169. attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
  170. attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
  171. attachment.initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  172. attachment.finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  173. result = radv_CreateRenderPass(device_h,
  174. &(VkRenderPassCreateInfo) {
  175. .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
  176. .attachmentCount = 1,
  177. .pAttachments = &attachment,
  178. .subpassCount = 1,
  179. .pSubpasses = &(VkSubpassDescription) {
  180. .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
  181. .inputAttachmentCount = 0,
  182. .colorAttachmentCount = 1,
  183. .pColorAttachments = (VkAttachmentReference[]) {
  184. {
  185. .attachment = 0,
  186. .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
  187. },
  188. },
  189. .pResolveAttachments = NULL,
  190. .pDepthStencilAttachment = &(VkAttachmentReference) {
  191. .attachment = VK_ATTACHMENT_UNUSED,
  192. },
  193. .preserveAttachmentCount = 0,
  194. .pPreserveAttachments = NULL,
  195. },
  196. .dependencyCount = 0,
  197. },
  198. alloc,
  199. &device->meta_state.fast_clear_flush.pass);
  200. return result;
  201. }
  202. static VkResult
  203. create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
  204. {
  205. VkPipelineLayoutCreateInfo pl_create_info = {
  206. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  207. .setLayoutCount = 0,
  208. .pSetLayouts = NULL,
  209. .pushConstantRangeCount = 0,
  210. .pPushConstantRanges = NULL,
  211. };
  212. return radv_CreatePipelineLayout(radv_device_to_handle(device),
  213. &pl_create_info,
  214. &device->meta_state.alloc,
  215. layout);
  216. }
  217. static VkResult
  218. create_pipeline(struct radv_device *device,
  219. VkShaderModule vs_module_h,
  220. VkPipelineLayout layout)
  221. {
  222. VkResult result;
  223. VkDevice device_h = radv_device_to_handle(device);
  224. struct radv_shader_module fs_module = {
  225. .nir = radv_meta_build_nir_fs_noop(),
  226. };
  227. if (!fs_module.nir) {
  228. /* XXX: Need more accurate error */
  229. result = VK_ERROR_OUT_OF_HOST_MEMORY;
  230. goto cleanup;
  231. }
  232. const VkPipelineShaderStageCreateInfo stages[2] = {
  233. {
  234. .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  235. .stage = VK_SHADER_STAGE_VERTEX_BIT,
  236. .module = vs_module_h,
  237. .pName = "main",
  238. },
  239. {
  240. .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  241. .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
  242. .module = radv_shader_module_to_handle(&fs_module),
  243. .pName = "main",
  244. },
  245. };
  246. const VkPipelineVertexInputStateCreateInfo vi_state = {
  247. .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
  248. .vertexBindingDescriptionCount = 0,
  249. .vertexAttributeDescriptionCount = 0,
  250. };
  251. const VkPipelineInputAssemblyStateCreateInfo ia_state = {
  252. .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
  253. .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
  254. .primitiveRestartEnable = false,
  255. };
  256. const VkPipelineColorBlendStateCreateInfo blend_state = {
  257. .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
  258. .logicOpEnable = false,
  259. .attachmentCount = 1,
  260. .pAttachments = (VkPipelineColorBlendAttachmentState []) {
  261. {
  262. .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
  263. VK_COLOR_COMPONENT_G_BIT |
  264. VK_COLOR_COMPONENT_B_BIT |
  265. VK_COLOR_COMPONENT_A_BIT,
  266. },
  267. }
  268. };
  269. const VkPipelineRasterizationStateCreateInfo rs_state = {
  270. .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
  271. .depthClampEnable = false,
  272. .rasterizerDiscardEnable = false,
  273. .polygonMode = VK_POLYGON_MODE_FILL,
  274. .cullMode = VK_CULL_MODE_NONE,
  275. .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
  276. };
  277. result = radv_graphics_pipeline_create(device_h,
  278. radv_pipeline_cache_to_handle(&device->meta_state.cache),
  279. &(VkGraphicsPipelineCreateInfo) {
  280. .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
  281. .stageCount = 2,
  282. .pStages = stages,
  283. .pVertexInputState = &vi_state,
  284. .pInputAssemblyState = &ia_state,
  285. .pViewportState = &(VkPipelineViewportStateCreateInfo) {
  286. .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
  287. .viewportCount = 1,
  288. .scissorCount = 1,
  289. },
  290. .pRasterizationState = &rs_state,
  291. .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
  292. .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
  293. .rasterizationSamples = 1,
  294. .sampleShadingEnable = false,
  295. .pSampleMask = NULL,
  296. .alphaToCoverageEnable = false,
  297. .alphaToOneEnable = false,
  298. },
  299. .pColorBlendState = &blend_state,
  300. .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
  301. .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
  302. .dynamicStateCount = 2,
  303. .pDynamicStates = (VkDynamicState[]) {
  304. VK_DYNAMIC_STATE_VIEWPORT,
  305. VK_DYNAMIC_STATE_SCISSOR,
  306. },
  307. },
  308. .layout = layout,
  309. .renderPass = device->meta_state.fast_clear_flush.pass,
  310. .subpass = 0,
  311. },
  312. &(struct radv_graphics_pipeline_create_info) {
  313. .use_rectlist = true,
  314. .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
  315. },
  316. &device->meta_state.alloc,
  317. &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
  318. if (result != VK_SUCCESS)
  319. goto cleanup;
  320. result = radv_graphics_pipeline_create(device_h,
  321. radv_pipeline_cache_to_handle(&device->meta_state.cache),
  322. &(VkGraphicsPipelineCreateInfo) {
  323. .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
  324. .stageCount = 2,
  325. .pStages = stages,
  326. .pVertexInputState = &vi_state,
  327. .pInputAssemblyState = &ia_state,
  328. .pViewportState = &(VkPipelineViewportStateCreateInfo) {
  329. .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
  330. .viewportCount = 1,
  331. .scissorCount = 1,
  332. },
  333. .pRasterizationState = &rs_state,
  334. .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
  335. .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
  336. .rasterizationSamples = 1,
  337. .sampleShadingEnable = false,
  338. .pSampleMask = NULL,
  339. .alphaToCoverageEnable = false,
  340. .alphaToOneEnable = false,
  341. },
  342. .pColorBlendState = &blend_state,
  343. .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
  344. .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
  345. .dynamicStateCount = 2,
  346. .pDynamicStates = (VkDynamicState[]) {
  347. VK_DYNAMIC_STATE_VIEWPORT,
  348. VK_DYNAMIC_STATE_SCISSOR,
  349. },
  350. },
  351. .layout = layout,
  352. .renderPass = device->meta_state.fast_clear_flush.pass,
  353. .subpass = 0,
  354. },
  355. &(struct radv_graphics_pipeline_create_info) {
  356. .use_rectlist = true,
  357. .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
  358. },
  359. &device->meta_state.alloc,
  360. &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
  361. if (result != VK_SUCCESS)
  362. goto cleanup;
  363. result = radv_graphics_pipeline_create(device_h,
  364. radv_pipeline_cache_to_handle(&device->meta_state.cache),
  365. &(VkGraphicsPipelineCreateInfo) {
  366. .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
  367. .stageCount = 2,
  368. .pStages = stages,
  369. .pVertexInputState = &vi_state,
  370. .pInputAssemblyState = &ia_state,
  371. .pViewportState = &(VkPipelineViewportStateCreateInfo) {
  372. .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
  373. .viewportCount = 1,
  374. .scissorCount = 1,
  375. },
  376. .pRasterizationState = &rs_state,
  377. .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
  378. .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
  379. .rasterizationSamples = 1,
  380. .sampleShadingEnable = false,
  381. .pSampleMask = NULL,
  382. .alphaToCoverageEnable = false,
  383. .alphaToOneEnable = false,
  384. },
  385. .pColorBlendState = &blend_state,
  386. .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
  387. .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
  388. .dynamicStateCount = 2,
  389. .pDynamicStates = (VkDynamicState[]) {
  390. VK_DYNAMIC_STATE_VIEWPORT,
  391. VK_DYNAMIC_STATE_SCISSOR,
  392. },
  393. },
  394. .layout = layout,
  395. .renderPass = device->meta_state.fast_clear_flush.pass,
  396. .subpass = 0,
  397. },
  398. &(struct radv_graphics_pipeline_create_info) {
  399. .use_rectlist = true,
  400. .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
  401. },
  402. &device->meta_state.alloc,
  403. &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
  404. if (result != VK_SUCCESS)
  405. goto cleanup;
  406. goto cleanup;
  407. cleanup:
  408. ralloc_free(fs_module.nir);
  409. return result;
  410. }
  411. void
  412. radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
  413. {
  414. struct radv_meta_state *state = &device->meta_state;
  415. radv_DestroyPipeline(radv_device_to_handle(device),
  416. state->fast_clear_flush.dcc_decompress_pipeline,
  417. &state->alloc);
  418. radv_DestroyPipeline(radv_device_to_handle(device),
  419. state->fast_clear_flush.fmask_decompress_pipeline,
  420. &state->alloc);
  421. radv_DestroyPipeline(radv_device_to_handle(device),
  422. state->fast_clear_flush.cmask_eliminate_pipeline,
  423. &state->alloc);
  424. radv_DestroyRenderPass(radv_device_to_handle(device),
  425. state->fast_clear_flush.pass, &state->alloc);
  426. radv_DestroyPipelineLayout(radv_device_to_handle(device),
  427. state->fast_clear_flush.p_layout,
  428. &state->alloc);
  429. radv_DestroyPipeline(radv_device_to_handle(device),
  430. state->fast_clear_flush.dcc_decompress_compute_pipeline,
  431. &state->alloc);
  432. radv_DestroyPipelineLayout(radv_device_to_handle(device),
  433. state->fast_clear_flush.dcc_decompress_compute_p_layout,
  434. &state->alloc);
  435. radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
  436. state->fast_clear_flush.dcc_decompress_compute_ds_layout,
  437. &state->alloc);
  438. }
  439. static VkResult
  440. radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
  441. {
  442. VkResult res = VK_SUCCESS;
  443. mtx_lock(&device->meta_state.mtx);
  444. if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
  445. mtx_unlock(&device->meta_state.mtx);
  446. return VK_SUCCESS;
  447. }
  448. struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
  449. if (!vs_module.nir) {
  450. /* XXX: Need more accurate error */
  451. res = VK_ERROR_OUT_OF_HOST_MEMORY;
  452. goto fail;
  453. }
  454. res = create_pass(device);
  455. if (res != VK_SUCCESS)
  456. goto fail;
  457. res = create_pipeline_layout(device,
  458. &device->meta_state.fast_clear_flush.p_layout);
  459. if (res != VK_SUCCESS)
  460. goto fail;
  461. VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
  462. res = create_pipeline(device, vs_module_h,
  463. device->meta_state.fast_clear_flush.p_layout);
  464. if (res != VK_SUCCESS)
  465. goto fail;
  466. res = create_dcc_compress_compute(device);
  467. if (res != VK_SUCCESS)
  468. goto fail;
  469. goto cleanup;
  470. fail:
  471. radv_device_finish_meta_fast_clear_flush_state(device);
  472. cleanup:
  473. ralloc_free(vs_module.nir);
  474. mtx_unlock(&device->meta_state.mtx);
  475. return res;
  476. }
  477. VkResult
  478. radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
  479. {
  480. if (on_demand)
  481. return VK_SUCCESS;
  482. return radv_device_init_meta_fast_clear_flush_state_internal(device);
  483. }
  484. static void
  485. radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
  486. struct radv_image *image,
  487. uint64_t pred_offset, bool value)
  488. {
  489. uint64_t va = 0;
  490. if (value) {
  491. va = radv_buffer_get_va(image->bo) + image->offset;
  492. va += pred_offset;
  493. }
  494. si_emit_set_predication_state(cmd_buffer, true, va);
  495. }
  496. /**
  497. */
  498. static void
  499. radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
  500. struct radv_image *image,
  501. const VkImageSubresourceRange *subresourceRange,
  502. bool decompress_dcc)
  503. {
  504. struct radv_meta_saved_state saved_state;
  505. VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
  506. VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
  507. uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
  508. bool old_predicating = false;
  509. VkPipeline pipeline;
  510. assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
  511. if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
  512. VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
  513. if (ret != VK_SUCCESS) {
  514. cmd_buffer->record_result = ret;
  515. return;
  516. }
  517. }
  518. radv_meta_save(&saved_state, cmd_buffer,
  519. RADV_META_SAVE_GRAPHICS_PIPELINE |
  520. RADV_META_SAVE_PASS);
  521. if (decompress_dcc && radv_image_has_dcc(image)) {
  522. pipeline = cmd_buffer->device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
  523. } else if (radv_image_has_fmask(image)) {
  524. pipeline = cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
  525. } else {
  526. pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
  527. }
  528. if (radv_image_has_dcc(image)) {
  529. uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
  530. image->fce_pred_offset;
  531. old_predicating = cmd_buffer->state.predicating;
  532. radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
  533. cmd_buffer->state.predicating = true;
  534. }
  535. radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
  536. pipeline);
  537. radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
  538. .x = 0,
  539. .y = 0,
  540. .width = image->info.width,
  541. .height = image->info.height,
  542. .minDepth = 0.0f,
  543. .maxDepth = 1.0f
  544. });
  545. radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
  546. .offset = (VkOffset2D) { 0, 0 },
  547. .extent = (VkExtent2D) { image->info.width, image->info.height },
  548. });
  549. for (uint32_t layer = 0; layer < layer_count; ++layer) {
  550. struct radv_image_view iview;
  551. radv_image_view_init(&iview, cmd_buffer->device,
  552. &(VkImageViewCreateInfo) {
  553. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  554. .image = radv_image_to_handle(image),
  555. .viewType = radv_meta_get_view_type(image),
  556. .format = image->vk_format,
  557. .subresourceRange = {
  558. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  559. .baseMipLevel = 0,
  560. .levelCount = 1,
  561. .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
  562. .layerCount = 1,
  563. },
  564. });
  565. VkFramebuffer fb_h;
  566. radv_CreateFramebuffer(device_h,
  567. &(VkFramebufferCreateInfo) {
  568. .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
  569. .attachmentCount = 1,
  570. .pAttachments = (VkImageView[]) {
  571. radv_image_view_to_handle(&iview)
  572. },
  573. .width = image->info.width,
  574. .height = image->info.height,
  575. .layers = 1
  576. },
  577. &cmd_buffer->pool->alloc,
  578. &fb_h);
  579. radv_CmdBeginRenderPass(cmd_buffer_h,
  580. &(VkRenderPassBeginInfo) {
  581. .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
  582. .renderPass = cmd_buffer->device->meta_state.fast_clear_flush.pass,
  583. .framebuffer = fb_h,
  584. .renderArea = {
  585. .offset = {
  586. 0,
  587. 0,
  588. },
  589. .extent = {
  590. image->info.width,
  591. image->info.height,
  592. }
  593. },
  594. .clearValueCount = 0,
  595. .pClearValues = NULL,
  596. },
  597. VK_SUBPASS_CONTENTS_INLINE);
  598. radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
  599. cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
  600. RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
  601. radv_CmdEndRenderPass(cmd_buffer_h);
  602. radv_DestroyFramebuffer(device_h, fb_h,
  603. &cmd_buffer->pool->alloc);
  604. }
  605. if (radv_image_has_dcc(image)) {
  606. uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
  607. image->fce_pred_offset;
  608. cmd_buffer->state.predicating = old_predicating;
  609. radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
  610. if (cmd_buffer->state.predication_type != -1) {
  611. /* Restore previous conditional rendering user state. */
  612. si_emit_set_predication_state(cmd_buffer,
  613. cmd_buffer->state.predication_type,
  614. cmd_buffer->state.predication_va);
  615. }
  616. }
  617. if (radv_image_has_dcc(image)) {
  618. /* Clear the image's fast-clear eliminate predicate because
  619. * FMASK and DCC also imply a fast-clear eliminate.
  620. */
  621. radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
  622. /* Mark the image as being decompressed. */
  623. if (decompress_dcc)
  624. radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
  625. }
  626. radv_meta_restore(&saved_state, cmd_buffer);
  627. }
  628. void
  629. radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
  630. struct radv_image *image,
  631. const VkImageSubresourceRange *subresourceRange)
  632. {
  633. radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
  634. }
  635. static void
  636. radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
  637. struct radv_image *image,
  638. const VkImageSubresourceRange *subresourceRange)
  639. {
  640. radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
  641. }
  642. static void
  643. radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
  644. struct radv_image *image,
  645. const VkImageSubresourceRange *subresourceRange)
  646. {
  647. struct radv_meta_saved_state saved_state;
  648. struct radv_image_view iview = {0};
  649. struct radv_device *device = cmd_buffer->device;
  650. /* This assumes the image is 2d with 1 layer and 1 mipmap level */
  651. struct radv_cmd_state *state = &cmd_buffer->state;
  652. state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
  653. RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
  654. if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
  655. VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
  656. if (ret != VK_SUCCESS) {
  657. cmd_buffer->record_result = ret;
  658. return;
  659. }
  660. }
  661. radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
  662. RADV_META_SAVE_COMPUTE_PIPELINE);
  663. radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
  664. VK_PIPELINE_BIND_POINT_COMPUTE,
  665. device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
  666. radv_image_view_init(&iview, cmd_buffer->device,
  667. &(VkImageViewCreateInfo) {
  668. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  669. .image = radv_image_to_handle(image),
  670. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  671. .format = image->vk_format,
  672. .subresourceRange = {
  673. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  674. .baseMipLevel = 0,
  675. .levelCount = 1,
  676. .baseArrayLayer = 0,
  677. .layerCount = 1
  678. },
  679. });
  680. radv_meta_push_descriptor_set(cmd_buffer,
  681. VK_PIPELINE_BIND_POINT_COMPUTE,
  682. device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
  683. 0, /* set */
  684. 2, /* descriptorWriteCount */
  685. (VkWriteDescriptorSet[]) {
  686. {
  687. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  688. .dstBinding = 0,
  689. .dstArrayElement = 0,
  690. .descriptorCount = 1,
  691. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  692. .pImageInfo = (VkDescriptorImageInfo[]) {
  693. {
  694. .sampler = VK_NULL_HANDLE,
  695. .imageView = radv_image_view_to_handle(&iview),
  696. .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
  697. },
  698. }
  699. },
  700. {
  701. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  702. .dstBinding = 1,
  703. .dstArrayElement = 0,
  704. .descriptorCount = 1,
  705. .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
  706. .pImageInfo = (VkDescriptorImageInfo[]) {
  707. {
  708. .sampler = VK_NULL_HANDLE,
  709. .imageView = radv_image_view_to_handle(&iview),
  710. .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
  711. },
  712. }
  713. }
  714. });
  715. radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
  716. /* Mark this image as actually being decompressed. */
  717. radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
  718. /* The fill buffer below does its own saving */
  719. radv_meta_restore(&saved_state, cmd_buffer);
  720. state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
  721. RADV_CMD_FLAG_INV_VMEM_L1;
  722. state->flush_bits |= radv_clear_dcc(cmd_buffer, image, subresourceRange,
  723. 0xffffffff);
  724. state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
  725. RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
  726. }
  727. void
  728. radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
  729. struct radv_image *image,
  730. const VkImageSubresourceRange *subresourceRange)
  731. {
  732. if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
  733. radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
  734. else
  735. radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
  736. }