Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

gen8_pipeline.c 27KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /*
  2. * Copyright © 2015 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. #include <assert.h>
  24. #include <stdbool.h>
  25. #include <string.h>
  26. #include <unistd.h>
  27. #include <fcntl.h>
  28. #include "anv_private.h"
  29. static void
  30. emit_vertex_input(struct anv_pipeline *pipeline,
  31. const VkPipelineVertexInputStateCreateInfo *info)
  32. {
  33. const uint32_t num_dwords = 1 + info->attributeCount * 2;
  34. uint32_t *p;
  35. if (info->attributeCount > 0) {
  36. p = anv_batch_emitn(&pipeline->batch, num_dwords,
  37. GEN8_3DSTATE_VERTEX_ELEMENTS);
  38. }
  39. for (uint32_t i = 0; i < info->attributeCount; i++) {
  40. const VkVertexInputAttributeDescription *desc =
  41. &info->pVertexAttributeDescriptions[i];
  42. const struct anv_format *format = anv_format_for_vk_format(desc->format);
  43. struct GEN8_VERTEX_ELEMENT_STATE element = {
  44. .VertexBufferIndex = desc->binding,
  45. .Valid = true,
  46. .SourceElementFormat = format->surface_format,
  47. .EdgeFlagEnable = false,
  48. .SourceElementOffset = desc->offsetInBytes,
  49. .Component0Control = VFCOMP_STORE_SRC,
  50. .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
  51. .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
  52. .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP
  53. };
  54. GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element);
  55. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING,
  56. .InstancingEnable = pipeline->instancing_enable[desc->binding],
  57. .VertexElementIndex = i,
  58. /* Vulkan so far doesn't have an instance divisor, so
  59. * this is always 1 (ignored if not instancing). */
  60. .InstanceDataStepRate = 1);
  61. }
  62. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS,
  63. .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid,
  64. .VertexIDComponentNumber = 2,
  65. .VertexIDElementOffset = info->bindingCount,
  66. .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid,
  67. .InstanceIDComponentNumber = 3,
  68. .InstanceIDElementOffset = info->bindingCount);
  69. }
  70. static void
  71. emit_ia_state(struct anv_pipeline *pipeline,
  72. const VkPipelineInputAssemblyStateCreateInfo *info,
  73. const struct anv_graphics_pipeline_create_info *extra)
  74. {
  75. struct GEN8_3DSTATE_VF vf = {
  76. GEN8_3DSTATE_VF_header,
  77. .IndexedDrawCutIndexEnable = pipeline->primitive_restart
  78. };
  79. GEN8_3DSTATE_VF_pack(NULL, pipeline->gen8.vf, &vf);
  80. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY,
  81. .PrimitiveTopologyType = pipeline->topology);
  82. }
  83. static void
  84. emit_rs_state(struct anv_pipeline *pipeline,
  85. const VkPipelineRasterStateCreateInfo *info,
  86. const struct anv_graphics_pipeline_create_info *extra)
  87. {
  88. static const uint32_t vk_to_gen_cullmode[] = {
  89. [VK_CULL_MODE_NONE] = CULLMODE_NONE,
  90. [VK_CULL_MODE_FRONT] = CULLMODE_FRONT,
  91. [VK_CULL_MODE_BACK] = CULLMODE_BACK,
  92. [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
  93. };
  94. static const uint32_t vk_to_gen_fillmode[] = {
  95. [VK_FILL_MODE_POINTS] = RASTER_POINT,
  96. [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME,
  97. [VK_FILL_MODE_SOLID] = RASTER_SOLID
  98. };
  99. static const uint32_t vk_to_gen_front_face[] = {
  100. [VK_FRONT_FACE_CCW] = CounterClockwise,
  101. [VK_FRONT_FACE_CW] = Clockwise
  102. };
  103. struct GEN8_3DSTATE_SF sf = {
  104. GEN8_3DSTATE_SF_header,
  105. .ViewportTransformEnable = !(extra && extra->disable_viewport),
  106. .TriangleStripListProvokingVertexSelect = 0,
  107. .LineStripListProvokingVertexSelect = 0,
  108. .TriangleFanProvokingVertexSelect = 0,
  109. .PointWidthSource = pipeline->writes_point_size ? Vertex : State,
  110. .PointWidth = 1.0,
  111. };
  112. /* FINISHME: VkBool32 rasterizerDiscardEnable; */
  113. GEN8_3DSTATE_SF_pack(NULL, pipeline->gen8.sf, &sf);
  114. struct GEN8_3DSTATE_RASTER raster = {
  115. GEN8_3DSTATE_RASTER_header,
  116. .FrontWinding = vk_to_gen_front_face[info->frontFace],
  117. .CullMode = vk_to_gen_cullmode[info->cullMode],
  118. .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode],
  119. .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode],
  120. .ScissorRectangleEnable = !(extra && extra->disable_scissor),
  121. .ViewportZClipTestEnable = info->depthClipEnable
  122. };
  123. GEN8_3DSTATE_RASTER_pack(NULL, pipeline->gen8.raster, &raster);
  124. }
  125. static void
  126. emit_cb_state(struct anv_pipeline *pipeline,
  127. const VkPipelineColorBlendStateCreateInfo *info)
  128. {
  129. struct anv_device *device = pipeline->device;
  130. static const uint32_t vk_to_gen_logic_op[] = {
  131. [VK_LOGIC_OP_COPY] = LOGICOP_COPY,
  132. [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
  133. [VK_LOGIC_OP_AND] = LOGICOP_AND,
  134. [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
  135. [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
  136. [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP,
  137. [VK_LOGIC_OP_XOR] = LOGICOP_XOR,
  138. [VK_LOGIC_OP_OR] = LOGICOP_OR,
  139. [VK_LOGIC_OP_NOR] = LOGICOP_NOR,
  140. [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV,
  141. [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
  142. [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
  143. [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
  144. [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
  145. [VK_LOGIC_OP_NAND] = LOGICOP_NAND,
  146. [VK_LOGIC_OP_SET] = LOGICOP_SET,
  147. };
  148. static const uint32_t vk_to_gen_blend[] = {
  149. [VK_BLEND_ZERO] = BLENDFACTOR_ZERO,
  150. [VK_BLEND_ONE] = BLENDFACTOR_ONE,
  151. [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
  152. [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
  153. [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR,
  154. [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
  155. [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
  156. [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
  157. [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA,
  158. [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
  159. [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
  160. [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR,
  161. [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
  162. [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA,
  163. [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
  164. [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
  165. [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
  166. [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
  167. [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
  168. };
  169. static const uint32_t vk_to_gen_blend_op[] = {
  170. [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
  171. [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
  172. [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
  173. [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
  174. [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
  175. };
  176. uint32_t num_dwords = GEN8_BLEND_STATE_length;
  177. pipeline->blend_state =
  178. anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
  179. struct GEN8_BLEND_STATE blend_state = {
  180. .AlphaToCoverageEnable = info->alphaToCoverageEnable,
  181. };
  182. for (uint32_t i = 0; i < info->attachmentCount; i++) {
  183. const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i];
  184. blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) {
  185. .LogicOpEnable = info->logicOpEnable,
  186. .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
  187. .ColorBufferBlendEnable = a->blendEnable,
  188. .PreBlendSourceOnlyClampEnable = false,
  189. .PreBlendColorClampEnable = false,
  190. .PostBlendColorClampEnable = false,
  191. .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor],
  192. .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor],
  193. .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor],
  194. .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha],
  195. .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha],
  196. .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha],
  197. .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT),
  198. .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT),
  199. .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT),
  200. .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT),
  201. };
  202. }
  203. GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state);
  204. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS,
  205. .BlendStatePointer = pipeline->blend_state.offset,
  206. .BlendStatePointerValid = true);
  207. }
  208. static const uint32_t vk_to_gen_compare_op[] = {
  209. [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER,
  210. [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS,
  211. [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL,
  212. [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL,
  213. [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER,
  214. [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL,
  215. [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL,
  216. [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS,
  217. };
  218. static const uint32_t vk_to_gen_stencil_op[] = {
  219. [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
  220. [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
  221. [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
  222. [VK_STENCIL_OP_INC_CLAMP] = STENCILOP_INCRSAT,
  223. [VK_STENCIL_OP_DEC_CLAMP] = STENCILOP_DECRSAT,
  224. [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
  225. [VK_STENCIL_OP_INC_WRAP] = STENCILOP_INCR,
  226. [VK_STENCIL_OP_DEC_WRAP] = STENCILOP_DECR,
  227. };
  228. static void
  229. emit_ds_state(struct anv_pipeline *pipeline,
  230. const VkPipelineDepthStencilStateCreateInfo *info)
  231. {
  232. if (info == NULL) {
  233. /* We're going to OR this together with the dynamic state. We need
  234. * to make sure it's initialized to something useful.
  235. */
  236. memset(pipeline->gen8.wm_depth_stencil, 0,
  237. sizeof(pipeline->gen8.wm_depth_stencil));
  238. return;
  239. }
  240. /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
  241. struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
  242. .DepthTestEnable = info->depthTestEnable,
  243. .DepthBufferWriteEnable = info->depthWriteEnable,
  244. .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
  245. .DoubleSidedStencilEnable = true,
  246. .StencilTestEnable = info->stencilTestEnable,
  247. .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp],
  248. .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp],
  249. .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp],
  250. .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp],
  251. .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp],
  252. .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp],
  253. .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp],
  254. .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp],
  255. };
  256. GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->gen8.wm_depth_stencil, &wm_depth_stencil);
  257. }
  258. VkResult
  259. gen8_graphics_pipeline_create(
  260. VkDevice _device,
  261. const VkGraphicsPipelineCreateInfo* pCreateInfo,
  262. const struct anv_graphics_pipeline_create_info *extra,
  263. VkPipeline* pPipeline)
  264. {
  265. ANV_FROM_HANDLE(anv_device, device, _device);
  266. struct anv_pipeline *pipeline;
  267. VkResult result;
  268. uint32_t offset, length;
  269. assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
  270. pipeline = anv_device_alloc(device, sizeof(*pipeline), 8,
  271. VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
  272. if (pipeline == NULL)
  273. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  274. result = anv_pipeline_init(pipeline, device, pCreateInfo, extra);
  275. if (result != VK_SUCCESS)
  276. return result;
  277. /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we
  278. * hard code this to num_attributes - 2. This is because the attributes
  279. * include VUE header and position, which aren't counted as varying
  280. * inputs. */
  281. if (pipeline->vs_simd8 == NO_KERNEL) {
  282. pipeline->wm_prog_data.num_varying_inputs =
  283. pCreateInfo->pVertexInputState->attributeCount - 2;
  284. }
  285. assert(pCreateInfo->pVertexInputState);
  286. emit_vertex_input(pipeline, pCreateInfo->pVertexInputState);
  287. assert(pCreateInfo->pInputAssemblyState);
  288. emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
  289. assert(pCreateInfo->pRasterState);
  290. emit_rs_state(pipeline, pCreateInfo->pRasterState, extra);
  291. emit_ds_state(pipeline, pCreateInfo->pDepthStencilState);
  292. emit_cb_state(pipeline, pCreateInfo->pColorBlendState);
  293. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS,
  294. .StatisticsEnable = true);
  295. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false);
  296. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false);
  297. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false);
  298. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false);
  299. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS,
  300. .ConstantBufferOffset = 0,
  301. .ConstantBufferSize = 4);
  302. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS,
  303. .ConstantBufferOffset = 4,
  304. .ConstantBufferSize = 4);
  305. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS,
  306. .ConstantBufferOffset = 8,
  307. .ConstantBufferSize = 4);
  308. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY,
  309. .ChromaKeyKillEnable = false);
  310. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ);
  311. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS);
  312. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP,
  313. .ClipEnable = true,
  314. .ViewportXYClipTestEnable = !(extra && extra->disable_viewport),
  315. .MinimumPointWidth = 0.125,
  316. .MaximumPointWidth = 255.875);
  317. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM,
  318. .StatisticsEnable = true,
  319. .LineEndCapAntialiasingRegionWidth = _05pixels,
  320. .LineAntialiasingRegionWidth = _10pixels,
  321. .EarlyDepthStencilControl = NORMAL,
  322. .ForceThreadDispatchEnable = NORMAL,
  323. .PointRasterizationRule = RASTRULE_UPPER_RIGHT,
  324. .BarycentricInterpolationMode =
  325. pipeline->wm_prog_data.barycentric_interp_modes);
  326. uint32_t samples = 1;
  327. uint32_t log2_samples = __builtin_ffs(samples) - 1;
  328. bool enable_sampling = samples > 1 ? true : false;
  329. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE,
  330. .PixelPositionOffsetEnable = enable_sampling,
  331. .PixelLocation = CENTER,
  332. .NumberofMultisamples = log2_samples);
  333. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK,
  334. .SampleMask = 0xffff);
  335. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS,
  336. .VSURBStartingAddress = pipeline->urb.vs_start,
  337. .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1,
  338. .VSNumberofURBEntries = pipeline->urb.nr_vs_entries);
  339. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS,
  340. .GSURBStartingAddress = pipeline->urb.gs_start,
  341. .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1,
  342. .GSNumberofURBEntries = pipeline->urb.nr_gs_entries);
  343. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS,
  344. .HSURBStartingAddress = pipeline->urb.vs_start,
  345. .HSURBEntryAllocationSize = 0,
  346. .HSNumberofURBEntries = 0);
  347. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS,
  348. .DSURBStartingAddress = pipeline->urb.vs_start,
  349. .DSURBEntryAllocationSize = 0,
  350. .DSNumberofURBEntries = 0);
  351. const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data;
  352. offset = 1;
  353. length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
  354. if (pipeline->gs_vec4 == NO_KERNEL)
  355. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false);
  356. else
  357. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS,
  358. .SingleProgramFlow = false,
  359. .KernelStartPointer = pipeline->gs_vec4,
  360. .VectorMaskEnable = Vmask,
  361. .SamplerCount = 0,
  362. .BindingTableEntryCount = 0,
  363. .ExpectedVertexCount = pipeline->gs_vertex_count,
  364. .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY],
  365. .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048),
  366. .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
  367. .OutputTopology = gs_prog_data->output_topology,
  368. .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length,
  369. .DispatchGRFStartRegisterForURBData =
  370. gs_prog_data->base.base.dispatch_grf_start_reg,
  371. .MaximumNumberofThreads = device->info.max_gs_threads,
  372. .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords,
  373. //pipeline->gs_prog_data.dispatch_mode |
  374. .StatisticsEnable = true,
  375. .IncludePrimitiveID = gs_prog_data->include_primitive_id,
  376. .ReorderMode = TRAILING,
  377. .Enable = true,
  378. .ControlDataFormat = gs_prog_data->control_data_format,
  379. /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
  380. * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
  381. * UserClipDistanceCullTestEnableBitmask(v)
  382. */
  383. .VertexURBEntryOutputReadOffset = offset,
  384. .VertexURBEntryOutputLength = length);
  385. const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base;
  386. /* Skip the VUE header and position slots */
  387. offset = 1;
  388. length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset;
  389. if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs))
  390. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS,
  391. .FunctionEnable = false,
  392. /* Even if VS is disabled, SBE still gets the amount of
  393. * vertex data to read from this field. */
  394. .VertexURBEntryOutputReadOffset = offset,
  395. .VertexURBEntryOutputLength = length);
  396. else
  397. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS,
  398. .KernelStartPointer = pipeline->vs_simd8,
  399. .SingleVertexDispatch = Multiple,
  400. .VectorMaskEnable = Dmask,
  401. .SamplerCount = 0,
  402. .BindingTableEntryCount =
  403. vue_prog_data->base.binding_table.size_bytes / 4,
  404. .ThreadDispatchPriority = Normal,
  405. .FloatingPointMode = IEEE754,
  406. .IllegalOpcodeExceptionEnable = false,
  407. .AccessesUAV = false,
  408. .SoftwareExceptionEnable = false,
  409. .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX],
  410. .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048),
  411. .DispatchGRFStartRegisterForURBData =
  412. vue_prog_data->base.dispatch_grf_start_reg,
  413. .VertexURBEntryReadLength = vue_prog_data->urb_read_length,
  414. .VertexURBEntryReadOffset = 0,
  415. .MaximumNumberofThreads = device->info.max_vs_threads - 1,
  416. .StatisticsEnable = false,
  417. .SIMD8DispatchEnable = true,
  418. .VertexCacheDisable = false,
  419. .FunctionEnable = true,
  420. .VertexURBEntryOutputReadOffset = offset,
  421. .VertexURBEntryOutputLength = length,
  422. .UserClipDistanceClipTestEnableBitmask = 0,
  423. .UserClipDistanceCullTestEnableBitmask = 0);
  424. const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
  425. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE,
  426. .ForceVertexURBEntryReadLength = false,
  427. .ForceVertexURBEntryReadOffset = false,
  428. .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
  429. .NumberofSFOutputAttributes =
  430. wm_prog_data->num_varying_inputs);
  431. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS,
  432. .KernelStartPointer0 = pipeline->ps_ksp0,
  433. .SingleProgramFlow = false,
  434. .VectorMaskEnable = true,
  435. .SamplerCount = 1,
  436. .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT],
  437. .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
  438. .MaximumNumberofThreadsPerPSD = 64 - 2,
  439. .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
  440. POSOFFSET_SAMPLE: POSOFFSET_NONE,
  441. .PushConstantEnable = wm_prog_data->base.nr_params > 0,
  442. ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
  443. ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
  444. ._32PixelDispatchEnable = false,
  445. .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
  446. .DispatchGRFStartRegisterForConstantSetupData1 = 0,
  447. .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
  448. .KernelStartPointer1 = 0,
  449. .KernelStartPointer2 = pipeline->ps_ksp2);
  450. bool per_sample_ps = false;
  451. anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA,
  452. .PixelShaderValid = true,
  453. .PixelShaderKillsPixel = wm_prog_data->uses_kill,
  454. .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
  455. .AttributeEnable = wm_prog_data->num_varying_inputs > 0,
  456. .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
  457. .PixelShaderIsPerSample = per_sample_ps);
  458. *pPipeline = anv_pipeline_to_handle(pipeline);
  459. return VK_SUCCESS;
  460. }
  461. VkResult gen8_compute_pipeline_create(
  462. VkDevice _device,
  463. const VkComputePipelineCreateInfo* pCreateInfo,
  464. VkPipeline* pPipeline)
  465. {
  466. ANV_FROM_HANDLE(anv_device, device, _device);
  467. struct anv_pipeline *pipeline;
  468. VkResult result;
  469. assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
  470. pipeline = anv_device_alloc(device, sizeof(*pipeline), 8,
  471. VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
  472. if (pipeline == NULL)
  473. return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  474. pipeline->device = device;
  475. pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
  476. result = anv_reloc_list_init(&pipeline->batch_relocs, device);
  477. if (result != VK_SUCCESS) {
  478. anv_device_free(device, pipeline);
  479. return result;
  480. }
  481. pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
  482. pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
  483. pipeline->batch.relocs = &pipeline->batch_relocs;
  484. anv_state_stream_init(&pipeline->program_stream,
  485. &device->instruction_block_pool);
  486. memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
  487. pipeline->shaders[VK_SHADER_STAGE_COMPUTE] =
  488. anv_shader_from_handle(pCreateInfo->stage.shader);
  489. pipeline->use_repclear = false;
  490. anv_compiler_run(device->compiler, pipeline);
  491. const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
  492. anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE,
  493. .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT],
  494. .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048),
  495. .ScratchSpaceBasePointerHigh = 0,
  496. .StackSize = 0,
  497. .MaximumNumberofThreads = device->info.max_cs_threads - 1,
  498. .NumberofURBEntries = 2,
  499. .ResetGatewayTimer = true,
  500. .BypassGatewayControl = true,
  501. .URBEntryAllocationSize = 2,
  502. .CURBEAllocationSize = 0);
  503. struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
  504. uint32_t group_size = prog_data->local_size[0] *
  505. prog_data->local_size[1] * prog_data->local_size[2];
  506. pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size);
  507. uint32_t remainder = group_size & (prog_data->simd_size - 1);
  508. if (remainder > 0)
  509. pipeline->cs_right_mask = ~0u >> (32 - remainder);
  510. else
  511. pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size);
  512. *pPipeline = anv_pipeline_to_handle(pipeline);
  513. return VK_SUCCESS;
  514. }