Clone of mesa.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

iris_program.c 62KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included
  12. * in all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. /**
  23. * @file iris_program.c
  24. *
  25. * This file contains the driver interface for compiling shaders.
  26. *
  27. * See iris_program_cache.c for the in-memory program cache where the
  28. * compiled shaders are stored.
  29. */
  30. #include <stdio.h>
  31. #include <errno.h>
  32. #include "pipe/p_defines.h"
  33. #include "pipe/p_state.h"
  34. #include "pipe/p_context.h"
  35. #include "pipe/p_screen.h"
  36. #include "util/u_atomic.h"
  37. #include "compiler/nir/nir.h"
  38. #include "compiler/nir/nir_builder.h"
  39. #include "intel/compiler/brw_compiler.h"
  40. #include "intel/compiler/brw_nir.h"
  41. #include "iris_context.h"
  42. #include "nir/tgsi_to_nir.h"
  43. #define KEY_INIT_NO_ID(gen) \
  44. .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
  45. .tex.compressed_multisample_layout_mask = ~0, \
  46. .tex.msaa_16 = (gen >= 9 ? ~0 : 0)
  47. #define KEY_INIT(gen) .program_string_id = ish->program_id, KEY_INIT_NO_ID(gen)
  48. static unsigned
  49. get_new_program_id(struct iris_screen *screen)
  50. {
  51. return p_atomic_inc_return(&screen->program_id);
  52. }
  53. /**
  54. * An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
  55. * It primarily contains the NIR for the shader.
  56. *
  57. * Each API-facing shader can be compiled into multiple shader variants,
  58. * based on non-orthogonal state dependencies, recorded in the shader key.
  59. *
  60. * See iris_compiled_shader, which represents a compiled shader variant.
  61. */
  62. struct iris_uncompiled_shader {
  63. nir_shader *nir;
  64. struct pipe_stream_output_info stream_output;
  65. unsigned program_id;
  66. /** Bitfield of (1 << IRIS_NOS_*) flags. */
  67. unsigned nos;
  68. /** Have any shader variants been compiled yet? */
  69. bool compiled_once;
  70. };
  71. static nir_ssa_def *
  72. get_aoa_deref_offset(nir_builder *b,
  73. nir_deref_instr *deref,
  74. unsigned elem_size)
  75. {
  76. unsigned array_size = elem_size;
  77. nir_ssa_def *offset = nir_imm_int(b, 0);
  78. while (deref->deref_type != nir_deref_type_var) {
  79. assert(deref->deref_type == nir_deref_type_array);
  80. /* This level's element size is the previous level's array size */
  81. nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
  82. assert(deref->arr.index.ssa);
  83. offset = nir_iadd(b, offset,
  84. nir_imul(b, index, nir_imm_int(b, array_size)));
  85. deref = nir_deref_instr_parent(deref);
  86. assert(glsl_type_is_array(deref->type));
  87. array_size *= glsl_get_length(deref->type);
  88. }
  89. /* Accessing an invalid surface index with the dataport can result in a
  90. * hang. According to the spec "if the index used to select an individual
  91. * element is negative or greater than or equal to the size of the array,
  92. * the results of the operation are undefined but may not lead to
  93. * termination" -- which is one of the possible outcomes of the hang.
  94. * Clamp the index to prevent access outside of the array bounds.
  95. */
  96. return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
  97. }
  98. static void
  99. iris_lower_storage_image_derefs(nir_shader *nir)
  100. {
  101. nir_function_impl *impl = nir_shader_get_entrypoint(nir);
  102. nir_builder b;
  103. nir_builder_init(&b, impl);
  104. nir_foreach_block(block, impl) {
  105. nir_foreach_instr_safe(instr, block) {
  106. if (instr->type != nir_instr_type_intrinsic)
  107. continue;
  108. nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  109. switch (intrin->intrinsic) {
  110. case nir_intrinsic_image_deref_load:
  111. case nir_intrinsic_image_deref_store:
  112. case nir_intrinsic_image_deref_atomic_add:
  113. case nir_intrinsic_image_deref_atomic_min:
  114. case nir_intrinsic_image_deref_atomic_max:
  115. case nir_intrinsic_image_deref_atomic_and:
  116. case nir_intrinsic_image_deref_atomic_or:
  117. case nir_intrinsic_image_deref_atomic_xor:
  118. case nir_intrinsic_image_deref_atomic_exchange:
  119. case nir_intrinsic_image_deref_atomic_comp_swap:
  120. case nir_intrinsic_image_deref_size:
  121. case nir_intrinsic_image_deref_samples:
  122. case nir_intrinsic_image_deref_load_raw_intel:
  123. case nir_intrinsic_image_deref_store_raw_intel: {
  124. nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  125. nir_variable *var = nir_deref_instr_get_variable(deref);
  126. b.cursor = nir_before_instr(&intrin->instr);
  127. nir_ssa_def *index =
  128. nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
  129. get_aoa_deref_offset(&b, deref, 1));
  130. nir_rewrite_image_intrinsic(intrin, index, false);
  131. break;
  132. }
  133. default:
  134. break;
  135. }
  136. }
  137. }
  138. }
  139. // XXX: need unify_interfaces() at link time...
  140. /**
  141. * Fix an uncompiled shader's stream output info.
  142. *
  143. * Core Gallium stores output->register_index as a "slot" number, where
  144. * slots are assigned consecutively to all outputs in info->outputs_written.
  145. * This naive packing of outputs doesn't work for us - we too have slots,
  146. * but the layout is defined by the VUE map, which we won't have until we
  147. * compile a specific shader variant. So, we remap these and simply store
  148. * VARYING_SLOT_* in our copy's output->register_index fields.
  149. *
  150. * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
  151. * components of our VUE header. See brw_vue_map.c for the layout.
  152. */
  153. static void
  154. update_so_info(struct pipe_stream_output_info *so_info,
  155. uint64_t outputs_written)
  156. {
  157. uint8_t reverse_map[64] = {};
  158. unsigned slot = 0;
  159. while (outputs_written) {
  160. reverse_map[slot++] = u_bit_scan64(&outputs_written);
  161. }
  162. for (unsigned i = 0; i < so_info->num_outputs; i++) {
  163. struct pipe_stream_output *output = &so_info->output[i];
  164. /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
  165. output->register_index = reverse_map[output->register_index];
  166. /* The VUE header contains three scalar fields packed together:
  167. * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
  168. * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
  169. * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
  170. */
  171. switch (output->register_index) {
  172. case VARYING_SLOT_LAYER:
  173. assert(output->num_components == 1);
  174. output->register_index = VARYING_SLOT_PSIZ;
  175. output->start_component = 1;
  176. break;
  177. case VARYING_SLOT_VIEWPORT:
  178. assert(output->num_components == 1);
  179. output->register_index = VARYING_SLOT_PSIZ;
  180. output->start_component = 2;
  181. break;
  182. case VARYING_SLOT_PSIZ:
  183. assert(output->num_components == 1);
  184. output->start_component = 3;
  185. break;
  186. }
  187. //info->outputs_written |= 1ull << output->register_index;
  188. }
  189. }
  190. /**
  191. * Sets up the starting offsets for the groups of binding table entries
  192. * common to all pipeline stages.
  193. *
  194. * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
  195. * unused but also make sure that addition of small offsets to them will
  196. * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
  197. */
  198. static uint32_t
  199. assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
  200. const struct nir_shader *nir,
  201. struct brw_stage_prog_data *prog_data,
  202. uint32_t next_binding_table_offset,
  203. unsigned num_system_values,
  204. unsigned num_cbufs)
  205. {
  206. const struct shader_info *info = &nir->info;
  207. unsigned num_textures = util_last_bit(info->textures_used);
  208. if (num_textures) {
  209. prog_data->binding_table.texture_start = next_binding_table_offset;
  210. prog_data->binding_table.gather_texture_start = next_binding_table_offset;
  211. next_binding_table_offset += num_textures;
  212. } else {
  213. prog_data->binding_table.texture_start = 0xd0d0d0d0;
  214. prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
  215. }
  216. if (info->num_images) {
  217. prog_data->binding_table.image_start = next_binding_table_offset;
  218. next_binding_table_offset += info->num_images;
  219. } else {
  220. prog_data->binding_table.image_start = 0xd0d0d0d0;
  221. }
  222. if (num_cbufs) {
  223. //assert(info->num_ubos <= BRW_MAX_UBO);
  224. prog_data->binding_table.ubo_start = next_binding_table_offset;
  225. next_binding_table_offset += num_cbufs;
  226. } else {
  227. prog_data->binding_table.ubo_start = 0xd0d0d0d0;
  228. }
  229. if (info->num_ssbos || info->num_abos) {
  230. prog_data->binding_table.ssbo_start = next_binding_table_offset;
  231. // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
  232. next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
  233. } else {
  234. prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
  235. }
  236. prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
  237. /* Plane 0 is just the regular texture section */
  238. prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
  239. prog_data->binding_table.plane_start[1] = next_binding_table_offset;
  240. next_binding_table_offset += num_textures;
  241. prog_data->binding_table.plane_start[2] = next_binding_table_offset;
  242. next_binding_table_offset += num_textures;
  243. /* Set the binding table size */
  244. prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
  245. return next_binding_table_offset;
  246. }
  247. static void
  248. setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
  249. unsigned offset, unsigned n)
  250. {
  251. assert(offset % sizeof(uint32_t) == 0);
  252. for (unsigned i = 0; i < n; ++i)
  253. sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
  254. for (unsigned i = n; i < 4; ++i)
  255. sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
  256. }
  257. /**
  258. * Associate NIR uniform variables with the prog_data->param[] mechanism
  259. * used by the backend. Also, decide which UBOs we'd like to push in an
  260. * ideal situation (though the backend can reduce this).
  261. */
  262. static void
  263. iris_setup_uniforms(const struct brw_compiler *compiler,
  264. void *mem_ctx,
  265. nir_shader *nir,
  266. struct brw_stage_prog_data *prog_data,
  267. enum brw_param_builtin **out_system_values,
  268. unsigned *out_num_system_values,
  269. unsigned *out_num_cbufs)
  270. {
  271. UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
  272. /* The intel compiler assumes that num_uniforms is in bytes. For
  273. * scalar that means 4 bytes per uniform slot.
  274. *
  275. * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
  276. */
  277. nir->num_uniforms *= 4;
  278. const unsigned IRIS_MAX_SYSTEM_VALUES =
  279. PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
  280. enum brw_param_builtin *system_values =
  281. rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
  282. unsigned num_system_values = 0;
  283. unsigned patch_vert_idx = -1;
  284. unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
  285. unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
  286. memset(ucp_idx, -1, sizeof(ucp_idx));
  287. memset(img_idx, -1, sizeof(img_idx));
  288. nir_function_impl *impl = nir_shader_get_entrypoint(nir);
  289. nir_builder b;
  290. nir_builder_init(&b, impl);
  291. b.cursor = nir_before_block(nir_start_block(impl));
  292. nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
  293. /* Turn system value intrinsics into uniforms */
  294. nir_foreach_block(block, impl) {
  295. nir_foreach_instr_safe(instr, block) {
  296. if (instr->type != nir_instr_type_intrinsic)
  297. continue;
  298. nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  299. nir_ssa_def *offset;
  300. switch (intrin->intrinsic) {
  301. case nir_intrinsic_load_user_clip_plane: {
  302. unsigned ucp = nir_intrinsic_ucp_id(intrin);
  303. if (ucp_idx[ucp] == -1) {
  304. ucp_idx[ucp] = num_system_values;
  305. num_system_values += 4;
  306. }
  307. for (int i = 0; i < 4; i++) {
  308. system_values[ucp_idx[ucp] + i] =
  309. BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
  310. }
  311. b.cursor = nir_before_instr(instr);
  312. offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
  313. break;
  314. }
  315. case nir_intrinsic_load_patch_vertices_in:
  316. if (patch_vert_idx == -1)
  317. patch_vert_idx = num_system_values++;
  318. system_values[patch_vert_idx] =
  319. BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
  320. b.cursor = nir_before_instr(instr);
  321. offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
  322. break;
  323. case nir_intrinsic_image_deref_load_param_intel: {
  324. assert(devinfo->gen < 9);
  325. nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  326. nir_variable *var = nir_deref_instr_get_variable(deref);
  327. /* XXX: var->data.binding is not set properly. We need to run
  328. * some form of gl_nir_lower_samplers_as_deref() to get it.
  329. * This breaks tests which use more than one image.
  330. */
  331. if (img_idx[var->data.binding] == -1) {
  332. /* GL only allows arrays of arrays of images. */
  333. assert(glsl_type_is_image(glsl_without_array(var->type)));
  334. unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
  335. for (int i = 0; i < num_images; i++) {
  336. const unsigned img = var->data.binding + i;
  337. img_idx[img] = num_system_values;
  338. num_system_values += BRW_IMAGE_PARAM_SIZE;
  339. uint32_t *img_sv = &system_values[img_idx[img]];
  340. setup_vec4_image_sysval(
  341. img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
  342. offsetof(struct brw_image_param, offset), 2);
  343. setup_vec4_image_sysval(
  344. img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
  345. offsetof(struct brw_image_param, size), 3);
  346. setup_vec4_image_sysval(
  347. img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
  348. offsetof(struct brw_image_param, stride), 4);
  349. setup_vec4_image_sysval(
  350. img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
  351. offsetof(struct brw_image_param, tiling), 3);
  352. setup_vec4_image_sysval(
  353. img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
  354. offsetof(struct brw_image_param, swizzling), 2);
  355. }
  356. }
  357. b.cursor = nir_before_instr(instr);
  358. offset = nir_iadd(&b,
  359. get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
  360. nir_imm_int(&b, img_idx[var->data.binding] * 4 +
  361. nir_intrinsic_base(intrin) * 16));
  362. break;
  363. }
  364. default:
  365. continue;
  366. }
  367. unsigned comps = nir_intrinsic_dest_components(intrin);
  368. nir_intrinsic_instr *load =
  369. nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
  370. load->num_components = comps;
  371. load->src[0] = nir_src_for_ssa(temp_ubo_name);
  372. load->src[1] = nir_src_for_ssa(offset);
  373. nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
  374. nir_builder_instr_insert(&b, &load->instr);
  375. nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
  376. nir_src_for_ssa(&load->dest.ssa));
  377. nir_instr_remove(instr);
  378. }
  379. }
  380. nir_validate_shader(nir, "before remapping");
  381. /* Place the new params at the front of constant buffer 0. */
  382. if (num_system_values > 0) {
  383. nir->num_uniforms += num_system_values * sizeof(uint32_t);
  384. system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
  385. num_system_values);
  386. nir_foreach_block(block, impl) {
  387. nir_foreach_instr_safe(instr, block) {
  388. if (instr->type != nir_instr_type_intrinsic)
  389. continue;
  390. nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
  391. if (load->intrinsic != nir_intrinsic_load_ubo)
  392. continue;
  393. b.cursor = nir_before_instr(instr);
  394. assert(load->src[0].is_ssa);
  395. if (load->src[0].ssa == temp_ubo_name) {
  396. nir_instr_rewrite_src(instr, &load->src[0],
  397. nir_src_for_ssa(nir_imm_int(&b, 0)));
  398. } else if (nir_src_as_uint(load->src[0]) == 0) {
  399. nir_ssa_def *offset =
  400. nir_iadd(&b, load->src[1].ssa,
  401. nir_imm_int(&b, 4 * num_system_values));
  402. nir_instr_rewrite_src(instr, &load->src[1],
  403. nir_src_for_ssa(offset));
  404. }
  405. }
  406. }
  407. /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
  408. nir_opt_constant_folding(nir);
  409. } else {
  410. ralloc_free(system_values);
  411. system_values = NULL;
  412. }
  413. nir_validate_shader(nir, "after remap");
  414. if (nir->info.stage != MESA_SHADER_COMPUTE)
  415. brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
  416. /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
  417. * about it for compute shaders, so go ahead and make some fake ones
  418. * which the backend will dead code eliminate.
  419. */
  420. prog_data->nr_params = nir->num_uniforms / 4;
  421. prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
  422. /* System values and uniforms are stored in constant buffer 0, the
  423. * user-facing UBOs are indexed by one. So if any constant buffer is
  424. * needed, the constant buffer 0 will be needed, so account for it.
  425. */
  426. unsigned num_cbufs = nir->info.num_ubos;
  427. if (num_cbufs || num_system_values || nir->num_uniforms)
  428. num_cbufs++;
  429. *out_system_values = system_values;
  430. *out_num_system_values = num_system_values;
  431. *out_num_cbufs = num_cbufs;
  432. }
  433. static void
  434. iris_debug_recompile(struct iris_context *ice,
  435. struct shader_info *info,
  436. unsigned program_string_id,
  437. const void *key)
  438. {
  439. struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
  440. const struct brw_compiler *c = screen->compiler;
  441. if (!info)
  442. return;
  443. c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
  444. _mesa_shader_stage_to_string(info->stage),
  445. info->name ? info->name : "(no identifier)",
  446. info->label ? info->label : "");
  447. const void *old_key =
  448. iris_find_previous_compile(ice, info->stage, program_string_id);
  449. brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key);
  450. }
  451. /**
  452. * Compile a vertex shader, and upload the assembly.
  453. */
  454. static struct iris_compiled_shader *
  455. iris_compile_vs(struct iris_context *ice,
  456. struct iris_uncompiled_shader *ish,
  457. const struct brw_vs_prog_key *key)
  458. {
  459. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  460. const struct brw_compiler *compiler = screen->compiler;
  461. const struct gen_device_info *devinfo = &screen->devinfo;
  462. void *mem_ctx = ralloc_context(NULL);
  463. struct brw_vs_prog_data *vs_prog_data =
  464. rzalloc(mem_ctx, struct brw_vs_prog_data);
  465. struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
  466. struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
  467. enum brw_param_builtin *system_values;
  468. unsigned num_system_values;
  469. unsigned num_cbufs;
  470. nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
  471. if (key->nr_userclip_plane_consts) {
  472. nir_function_impl *impl = nir_shader_get_entrypoint(nir);
  473. nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
  474. nir_lower_io_to_temporaries(nir, impl, true, false);
  475. nir_lower_global_vars_to_local(nir);
  476. nir_lower_vars_to_ssa(nir);
  477. nir_shader_gather_info(nir, impl);
  478. }
  479. if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
  480. prog_data->use_alt_mode = true;
  481. iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
  482. &num_system_values, &num_cbufs);
  483. assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
  484. num_system_values, num_cbufs);
  485. brw_compute_vue_map(devinfo,
  486. &vue_prog_data->vue_map, nir->info.outputs_written,
  487. nir->info.separate_shader);
  488. /* Don't tell the backend about our clip plane constants, we've already
  489. * lowered them in NIR and we don't want it doing it again.
  490. */
  491. struct brw_vs_prog_key key_no_ucp = *key;
  492. key_no_ucp.nr_userclip_plane_consts = 0;
  493. char *error_str = NULL;
  494. const unsigned *program =
  495. brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
  496. nir, -1, &error_str);
  497. if (program == NULL) {
  498. dbg_printf("Failed to compile vertex shader: %s\n", error_str);
  499. ralloc_free(mem_ctx);
  500. return false;
  501. }
  502. if (ish->compiled_once) {
  503. iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
  504. } else {
  505. ish->compiled_once = true;
  506. }
  507. uint32_t *so_decls =
  508. ice->vtbl.create_so_decl_list(&ish->stream_output,
  509. &vue_prog_data->vue_map);
  510. struct iris_compiled_shader *shader =
  511. iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
  512. prog_data, so_decls, system_values, num_system_values,
  513. num_cbufs);
  514. ralloc_free(mem_ctx);
  515. return shader;
  516. }
  517. /**
  518. * Update the current vertex shader variant.
  519. *
  520. * Fill out the key, look in the cache, compile and bind if needed.
  521. */
  522. static void
  523. iris_update_compiled_vs(struct iris_context *ice)
  524. {
  525. struct iris_uncompiled_shader *ish =
  526. ice->shaders.uncompiled[MESA_SHADER_VERTEX];
  527. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  528. const struct gen_device_info *devinfo = &screen->devinfo;
  529. struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
  530. ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
  531. struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
  532. struct iris_compiled_shader *shader =
  533. iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
  534. if (!shader)
  535. shader = iris_compile_vs(ice, ish, &key);
  536. if (old != shader) {
  537. ice->shaders.prog[IRIS_CACHE_VS] = shader;
  538. ice->state.dirty |= IRIS_DIRTY_VS |
  539. IRIS_DIRTY_BINDINGS_VS |
  540. IRIS_DIRTY_CONSTANTS_VS |
  541. IRIS_DIRTY_VF_SGVS;
  542. const struct brw_vs_prog_data *vs_prog_data =
  543. (void *) shader->prog_data;
  544. const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
  545. vs_prog_data->uses_baseinstance;
  546. const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
  547. vs_prog_data->uses_is_indexed_draw;
  548. const bool needs_sgvs_element = uses_draw_params ||
  549. vs_prog_data->uses_instanceid ||
  550. vs_prog_data->uses_vertexid;
  551. bool needs_edge_flag = false;
  552. nir_foreach_variable(var, &ish->nir->inputs) {
  553. if (var->data.location == VERT_ATTRIB_EDGEFLAG)
  554. needs_edge_flag = true;
  555. }
  556. if (ice->state.vs_uses_draw_params != uses_draw_params ||
  557. ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
  558. ice->state.vs_needs_edge_flag != needs_edge_flag) {
  559. ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
  560. IRIS_DIRTY_VERTEX_ELEMENTS;
  561. }
  562. ice->state.vs_uses_draw_params = uses_draw_params;
  563. ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
  564. ice->state.vs_needs_sgvs_element = needs_sgvs_element;
  565. ice->state.vs_needs_edge_flag = needs_edge_flag;
  566. }
  567. }
  568. /**
  569. * Get the shader_info for a given stage, or NULL if the stage is disabled.
  570. */
  571. const struct shader_info *
  572. iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
  573. {
  574. const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
  575. if (!ish)
  576. return NULL;
  577. const nir_shader *nir = ish->nir;
  578. return &nir->info;
  579. }
  580. /**
  581. * Get the union of TCS output and TES input slots.
  582. *
  583. * TCS and TES need to agree on a common URB entry layout. In particular,
  584. * the data for all patch vertices is stored in a single URB entry (unlike
  585. * GS which has one entry per input vertex). This means that per-vertex
  586. * array indexing needs a stride.
  587. *
  588. * SSO requires locations to match, but doesn't require the number of
  589. * outputs/inputs to match (in fact, the TCS often has extra outputs).
  590. * So, we need to take the extra step of unifying these on the fly.
  591. */
  592. static void
  593. get_unified_tess_slots(const struct iris_context *ice,
  594. uint64_t *per_vertex_slots,
  595. uint32_t *per_patch_slots)
  596. {
  597. const struct shader_info *tcs =
  598. iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
  599. const struct shader_info *tes =
  600. iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
  601. *per_vertex_slots = tes->inputs_read;
  602. *per_patch_slots = tes->patch_inputs_read;
  603. if (tcs) {
  604. *per_vertex_slots |= tcs->outputs_written;
  605. *per_patch_slots |= tcs->patch_outputs_written;
  606. }
  607. }
  608. /**
  609. * Compile a tessellation control shader, and upload the assembly.
  610. */
  611. static struct iris_compiled_shader *
  612. iris_compile_tcs(struct iris_context *ice,
  613. struct iris_uncompiled_shader *ish,
  614. const struct brw_tcs_prog_key *key)
  615. {
  616. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  617. const struct brw_compiler *compiler = screen->compiler;
  618. const struct nir_shader_compiler_options *options =
  619. compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
  620. const struct gen_device_info *devinfo = &screen->devinfo;
  621. void *mem_ctx = ralloc_context(NULL);
  622. struct brw_tcs_prog_data *tcs_prog_data =
  623. rzalloc(mem_ctx, struct brw_tcs_prog_data);
  624. struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
  625. struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
  626. enum brw_param_builtin *system_values = NULL;
  627. unsigned num_system_values = 0;
  628. unsigned num_cbufs = 0;
  629. nir_shader *nir;
  630. if (ish) {
  631. nir = nir_shader_clone(mem_ctx, ish->nir);
  632. iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
  633. &num_system_values, &num_cbufs);
  634. assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
  635. num_system_values, num_cbufs);
  636. } else {
  637. nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
  638. /* Reserve space for passing the default tess levels as constants. */
  639. num_system_values = 8;
  640. system_values =
  641. rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
  642. prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
  643. prog_data->nr_params = num_system_values;
  644. if (key->tes_primitive_mode == GL_QUADS) {
  645. for (int i = 0; i < 4; i++)
  646. system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
  647. system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
  648. system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
  649. } else if (key->tes_primitive_mode == GL_TRIANGLES) {
  650. for (int i = 0; i < 3; i++)
  651. system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
  652. system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
  653. } else {
  654. assert(key->tes_primitive_mode == GL_ISOLINES);
  655. system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
  656. system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
  657. }
  658. prog_data->ubo_ranges[0].length = 1;
  659. }
  660. char *error_str = NULL;
  661. const unsigned *program =
  662. brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
  663. -1, &error_str);
  664. if (program == NULL) {
  665. dbg_printf("Failed to compile control shader: %s\n", error_str);
  666. ralloc_free(mem_ctx);
  667. return false;
  668. }
  669. if (ish) {
  670. if (ish->compiled_once) {
  671. iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
  672. } else {
  673. ish->compiled_once = true;
  674. }
  675. }
  676. struct iris_compiled_shader *shader =
  677. iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
  678. prog_data, NULL, system_values, num_system_values,
  679. num_cbufs);
  680. ralloc_free(mem_ctx);
  681. return shader;
  682. }
  683. /**
  684. * Update the current tessellation control shader variant.
  685. *
  686. * Fill out the key, look in the cache, compile and bind if needed.
  687. */
  688. static void
  689. iris_update_compiled_tcs(struct iris_context *ice)
  690. {
  691. struct iris_uncompiled_shader *tcs =
  692. ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
  693. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  694. const struct gen_device_info *devinfo = &screen->devinfo;
  695. const struct shader_info *tes_info =
  696. iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
  697. struct brw_tcs_prog_key key = {
  698. KEY_INIT_NO_ID(devinfo->gen),
  699. .program_string_id = tcs ? tcs->program_id : 0,
  700. .tes_primitive_mode = tes_info->tess.primitive_mode,
  701. .input_vertices = ice->state.vertices_per_patch,
  702. };
  703. get_unified_tess_slots(ice, &key.outputs_written,
  704. &key.patch_outputs_written);
  705. ice->vtbl.populate_tcs_key(ice, &key);
  706. struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
  707. struct iris_compiled_shader *shader =
  708. iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
  709. if (!shader)
  710. shader = iris_compile_tcs(ice, tcs, &key);
  711. if (old != shader) {
  712. ice->shaders.prog[IRIS_CACHE_TCS] = shader;
  713. ice->state.dirty |= IRIS_DIRTY_TCS |
  714. IRIS_DIRTY_BINDINGS_TCS |
  715. IRIS_DIRTY_CONSTANTS_TCS;
  716. }
  717. }
  718. /**
  719. * Compile a tessellation evaluation shader, and upload the assembly.
  720. */
  721. static struct iris_compiled_shader *
  722. iris_compile_tes(struct iris_context *ice,
  723. struct iris_uncompiled_shader *ish,
  724. const struct brw_tes_prog_key *key)
  725. {
  726. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  727. const struct brw_compiler *compiler = screen->compiler;
  728. const struct gen_device_info *devinfo = &screen->devinfo;
  729. void *mem_ctx = ralloc_context(NULL);
  730. struct brw_tes_prog_data *tes_prog_data =
  731. rzalloc(mem_ctx, struct brw_tes_prog_data);
  732. struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
  733. struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
  734. enum brw_param_builtin *system_values;
  735. unsigned num_system_values;
  736. unsigned num_cbufs;
  737. nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
  738. iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
  739. &num_system_values, &num_cbufs);
  740. assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
  741. num_system_values, num_cbufs);
  742. struct brw_vue_map input_vue_map;
  743. brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
  744. key->patch_inputs_read);
  745. char *error_str = NULL;
  746. const unsigned *program =
  747. brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
  748. tes_prog_data, nir, NULL, -1, &error_str);
  749. if (program == NULL) {
  750. dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
  751. ralloc_free(mem_ctx);
  752. return false;
  753. }
  754. if (ish->compiled_once) {
  755. iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
  756. } else {
  757. ish->compiled_once = true;
  758. }
  759. uint32_t *so_decls =
  760. ice->vtbl.create_so_decl_list(&ish->stream_output,
  761. &vue_prog_data->vue_map);
  762. struct iris_compiled_shader *shader =
  763. iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
  764. prog_data, so_decls, system_values, num_system_values,
  765. num_cbufs);
  766. ralloc_free(mem_ctx);
  767. return shader;
  768. }
  769. /**
  770. * Update the current tessellation evaluation shader variant.
  771. *
  772. * Fill out the key, look in the cache, compile and bind if needed.
  773. */
  774. static void
  775. iris_update_compiled_tes(struct iris_context *ice)
  776. {
  777. struct iris_uncompiled_shader *ish =
  778. ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
  779. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  780. const struct gen_device_info *devinfo = &screen->devinfo;
  781. struct brw_tes_prog_key key = { KEY_INIT(devinfo->gen) };
  782. get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
  783. ice->vtbl.populate_tes_key(ice, &key);
  784. struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
  785. struct iris_compiled_shader *shader =
  786. iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
  787. if (!shader)
  788. shader = iris_compile_tes(ice, ish, &key);
  789. if (old != shader) {
  790. ice->shaders.prog[IRIS_CACHE_TES] = shader;
  791. ice->state.dirty |= IRIS_DIRTY_TES |
  792. IRIS_DIRTY_BINDINGS_TES |
  793. IRIS_DIRTY_CONSTANTS_TES;
  794. }
  795. /* TODO: Could compare and avoid flagging this. */
  796. const struct shader_info *tes_info = &ish->nir->info;
  797. if (tes_info->system_values_read & (1ull << SYSTEM_VALUE_VERTICES_IN)) {
  798. ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TES;
  799. ice->state.shaders[MESA_SHADER_TESS_EVAL].cbuf0_needs_upload = true;
  800. }
  801. }
  802. /**
  803. * Compile a geometry shader, and upload the assembly.
  804. */
  805. static struct iris_compiled_shader *
  806. iris_compile_gs(struct iris_context *ice,
  807. struct iris_uncompiled_shader *ish,
  808. const struct brw_gs_prog_key *key)
  809. {
  810. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  811. const struct brw_compiler *compiler = screen->compiler;
  812. const struct gen_device_info *devinfo = &screen->devinfo;
  813. void *mem_ctx = ralloc_context(NULL);
  814. struct brw_gs_prog_data *gs_prog_data =
  815. rzalloc(mem_ctx, struct brw_gs_prog_data);
  816. struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
  817. struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
  818. enum brw_param_builtin *system_values;
  819. unsigned num_system_values;
  820. unsigned num_cbufs;
  821. nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
  822. iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
  823. &num_system_values, &num_cbufs);
  824. assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
  825. num_system_values, num_cbufs);
  826. brw_compute_vue_map(devinfo,
  827. &vue_prog_data->vue_map, nir->info.outputs_written,
  828. nir->info.separate_shader);
  829. char *error_str = NULL;
  830. const unsigned *program =
  831. brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
  832. NULL, -1, &error_str);
  833. if (program == NULL) {
  834. dbg_printf("Failed to compile geometry shader: %s\n", error_str);
  835. ralloc_free(mem_ctx);
  836. return false;
  837. }
  838. if (ish->compiled_once) {
  839. iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
  840. } else {
  841. ish->compiled_once = true;
  842. }
  843. uint32_t *so_decls =
  844. ice->vtbl.create_so_decl_list(&ish->stream_output,
  845. &vue_prog_data->vue_map);
  846. struct iris_compiled_shader *shader =
  847. iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
  848. prog_data, so_decls, system_values, num_system_values,
  849. num_cbufs);
  850. ralloc_free(mem_ctx);
  851. return shader;
  852. }
  853. /**
  854. * Update the current geometry shader variant.
  855. *
  856. * Fill out the key, look in the cache, compile and bind if needed.
  857. */
  858. static void
  859. iris_update_compiled_gs(struct iris_context *ice)
  860. {
  861. struct iris_uncompiled_shader *ish =
  862. ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
  863. struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
  864. struct iris_compiled_shader *shader = NULL;
  865. if (ish) {
  866. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  867. const struct gen_device_info *devinfo = &screen->devinfo;
  868. struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
  869. ice->vtbl.populate_gs_key(ice, &key);
  870. shader =
  871. iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
  872. if (!shader)
  873. shader = iris_compile_gs(ice, ish, &key);
  874. }
  875. if (old != shader) {
  876. ice->shaders.prog[IRIS_CACHE_GS] = shader;
  877. ice->state.dirty |= IRIS_DIRTY_GS |
  878. IRIS_DIRTY_BINDINGS_GS |
  879. IRIS_DIRTY_CONSTANTS_GS;
  880. }
  881. }
  882. /**
  883. * Compile a fragment (pixel) shader, and upload the assembly.
  884. */
  885. static struct iris_compiled_shader *
  886. iris_compile_fs(struct iris_context *ice,
  887. struct iris_uncompiled_shader *ish,
  888. const struct brw_wm_prog_key *key,
  889. struct brw_vue_map *vue_map)
  890. {
  891. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  892. const struct brw_compiler *compiler = screen->compiler;
  893. const struct gen_device_info *devinfo = &screen->devinfo;
  894. void *mem_ctx = ralloc_context(NULL);
  895. struct brw_wm_prog_data *fs_prog_data =
  896. rzalloc(mem_ctx, struct brw_wm_prog_data);
  897. struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
  898. enum brw_param_builtin *system_values;
  899. unsigned num_system_values;
  900. unsigned num_cbufs;
  901. nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
  902. if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
  903. prog_data->use_alt_mode = true;
  904. iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
  905. &num_system_values, &num_cbufs);
  906. assign_common_binding_table_offsets(devinfo, nir, prog_data,
  907. MAX2(key->nr_color_regions, 1),
  908. num_system_values, num_cbufs);
  909. char *error_str = NULL;
  910. const unsigned *program =
  911. brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
  912. nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
  913. if (program == NULL) {
  914. dbg_printf("Failed to compile fragment shader: %s\n", error_str);
  915. ralloc_free(mem_ctx);
  916. return false;
  917. }
  918. if (ish->compiled_once) {
  919. iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
  920. } else {
  921. ish->compiled_once = true;
  922. }
  923. struct iris_compiled_shader *shader =
  924. iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
  925. prog_data, NULL, system_values, num_system_values,
  926. num_cbufs);
  927. ralloc_free(mem_ctx);
  928. return shader;
  929. }
  930. /**
  931. * Update the current fragment shader variant.
  932. *
  933. * Fill out the key, look in the cache, compile and bind if needed.
  934. */
  935. static void
  936. iris_update_compiled_fs(struct iris_context *ice)
  937. {
  938. struct iris_uncompiled_shader *ish =
  939. ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
  940. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  941. const struct gen_device_info *devinfo = &screen->devinfo;
  942. struct brw_wm_prog_key key = { KEY_INIT(devinfo->gen) };
  943. ice->vtbl.populate_fs_key(ice, &key);
  944. if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
  945. key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
  946. struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
  947. struct iris_compiled_shader *shader =
  948. iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
  949. if (!shader)
  950. shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
  951. if (old != shader) {
  952. // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
  953. // toggles. might be able to avoid flagging SBE too.
  954. ice->shaders.prog[IRIS_CACHE_FS] = shader;
  955. ice->state.dirty |= IRIS_DIRTY_FS |
  956. IRIS_DIRTY_BINDINGS_FS |
  957. IRIS_DIRTY_CONSTANTS_FS |
  958. IRIS_DIRTY_WM |
  959. IRIS_DIRTY_CLIP |
  960. IRIS_DIRTY_SBE;
  961. }
  962. }
  963. /**
  964. * Get the compiled shader for the last enabled geometry stage.
  965. *
  966. * This stage is the one which will feed stream output and the rasterizer.
  967. */
  968. static gl_shader_stage
  969. last_vue_stage(struct iris_context *ice)
  970. {
  971. if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
  972. return MESA_SHADER_GEOMETRY;
  973. if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
  974. return MESA_SHADER_TESS_EVAL;
  975. return MESA_SHADER_VERTEX;
  976. }
  977. /**
  978. * Update the last enabled stage's VUE map.
  979. *
  980. * When the shader feeding the rasterizer's output interface changes, we
  981. * need to re-emit various packets.
  982. */
  983. static void
  984. update_last_vue_map(struct iris_context *ice,
  985. struct brw_stage_prog_data *prog_data)
  986. {
  987. struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
  988. struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
  989. struct brw_vue_map *old_map = ice->shaders.last_vue_map;
  990. const uint64_t changed_slots =
  991. (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
  992. if (changed_slots & VARYING_BIT_VIEWPORT) {
  993. // XXX: could use ctx->Const.MaxViewports for old API efficiency
  994. ice->state.num_viewports =
  995. (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
  996. ice->state.dirty |= IRIS_DIRTY_CLIP |
  997. IRIS_DIRTY_SF_CL_VIEWPORT |
  998. IRIS_DIRTY_CC_VIEWPORT |
  999. IRIS_DIRTY_SCISSOR_RECT |
  1000. IRIS_DIRTY_UNCOMPILED_FS |
  1001. ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
  1002. // XXX: CC_VIEWPORT?
  1003. }
  1004. if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
  1005. ice->state.dirty |= IRIS_DIRTY_SBE;
  1006. }
  1007. ice->shaders.last_vue_map = &vue_prog_data->vue_map;
  1008. }
  1009. /**
  1010. * Get the prog_data for a given stage, or NULL if the stage is disabled.
  1011. */
  1012. static struct brw_vue_prog_data *
  1013. get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
  1014. {
  1015. if (!ice->shaders.prog[stage])
  1016. return NULL;
  1017. return (void *) ice->shaders.prog[stage]->prog_data;
  1018. }
  1019. // XXX: iris_compiled_shaders are space-leaking :(
  1020. // XXX: do remember to unbind them if deleting them.
  1021. /**
  1022. * Update the current shader variants for the given state.
  1023. *
  1024. * This should be called on every draw call to ensure that the correct
  1025. * shaders are bound. It will also flag any dirty state triggered by
  1026. * swapping out those shaders.
  1027. */
  1028. void
  1029. iris_update_compiled_shaders(struct iris_context *ice)
  1030. {
  1031. const uint64_t dirty = ice->state.dirty;
  1032. struct brw_vue_prog_data *old_prog_datas[4];
  1033. if (!(dirty & IRIS_DIRTY_URB)) {
  1034. for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
  1035. old_prog_datas[i] = get_vue_prog_data(ice, i);
  1036. }
  1037. if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
  1038. struct iris_uncompiled_shader *tes =
  1039. ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
  1040. if (tes) {
  1041. iris_update_compiled_tcs(ice);
  1042. iris_update_compiled_tes(ice);
  1043. } else {
  1044. ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
  1045. ice->shaders.prog[IRIS_CACHE_TES] = NULL;
  1046. ice->state.dirty |=
  1047. IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
  1048. IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
  1049. IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
  1050. }
  1051. }
  1052. if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
  1053. iris_update_compiled_vs(ice);
  1054. if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
  1055. iris_update_compiled_gs(ice);
  1056. gl_shader_stage last_stage = last_vue_stage(ice);
  1057. struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
  1058. struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
  1059. update_last_vue_map(ice, shader->prog_data);
  1060. if (ice->state.streamout != shader->streamout) {
  1061. ice->state.streamout = shader->streamout;
  1062. ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
  1063. }
  1064. if (ice->state.streamout_active) {
  1065. for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
  1066. struct iris_stream_output_target *so =
  1067. (void *) ice->state.so_target[i];
  1068. if (so)
  1069. so->stride = ish->stream_output.stride[i];
  1070. }
  1071. }
  1072. if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
  1073. iris_update_compiled_fs(ice);
  1074. // ...
  1075. /* Changing shader interfaces may require a URB configuration. */
  1076. if (!(dirty & IRIS_DIRTY_URB)) {
  1077. for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
  1078. struct brw_vue_prog_data *old = old_prog_datas[i];
  1079. struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
  1080. if (!!old != !!new ||
  1081. (new && new->urb_entry_size != old->urb_entry_size)) {
  1082. ice->state.dirty |= IRIS_DIRTY_URB;
  1083. break;
  1084. }
  1085. }
  1086. }
  1087. }
  1088. static struct iris_compiled_shader *
  1089. iris_compile_cs(struct iris_context *ice,
  1090. struct iris_uncompiled_shader *ish,
  1091. const struct brw_cs_prog_key *key)
  1092. {
  1093. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  1094. const struct brw_compiler *compiler = screen->compiler;
  1095. const struct gen_device_info *devinfo = &screen->devinfo;
  1096. void *mem_ctx = ralloc_context(NULL);
  1097. struct brw_cs_prog_data *cs_prog_data =
  1098. rzalloc(mem_ctx, struct brw_cs_prog_data);
  1099. struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
  1100. enum brw_param_builtin *system_values;
  1101. unsigned num_system_values;
  1102. unsigned num_cbufs;
  1103. nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
  1104. cs_prog_data->binding_table.work_groups_start = 0;
  1105. prog_data->total_shared = nir->info.cs.shared_size;
  1106. iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
  1107. &num_system_values, &num_cbufs);
  1108. assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
  1109. num_system_values, num_cbufs);
  1110. char *error_str = NULL;
  1111. const unsigned *program =
  1112. brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
  1113. nir, -1, &error_str);
  1114. if (program == NULL) {
  1115. dbg_printf("Failed to compile compute shader: %s\n", error_str);
  1116. ralloc_free(mem_ctx);
  1117. return false;
  1118. }
  1119. if (ish->compiled_once) {
  1120. iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
  1121. } else {
  1122. ish->compiled_once = true;
  1123. }
  1124. struct iris_compiled_shader *shader =
  1125. iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
  1126. prog_data, NULL, system_values, num_system_values,
  1127. num_cbufs);
  1128. ralloc_free(mem_ctx);
  1129. return shader;
  1130. }
  1131. void
  1132. iris_update_compiled_compute_shader(struct iris_context *ice)
  1133. {
  1134. struct iris_uncompiled_shader *ish =
  1135. ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
  1136. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  1137. const struct gen_device_info *devinfo = &screen->devinfo;
  1138. struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
  1139. ice->vtbl.populate_cs_key(ice, &key);
  1140. struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
  1141. struct iris_compiled_shader *shader =
  1142. iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
  1143. if (!shader)
  1144. shader = iris_compile_cs(ice, ish, &key);
  1145. if (old != shader) {
  1146. ice->shaders.prog[IRIS_CACHE_CS] = shader;
  1147. ice->state.dirty |= IRIS_DIRTY_CS |
  1148. IRIS_DIRTY_BINDINGS_CS |
  1149. IRIS_DIRTY_CONSTANTS_CS;
  1150. }
  1151. }
  1152. void
  1153. iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
  1154. uint32_t *dst)
  1155. {
  1156. assert(cs_prog_data->push.total.size > 0);
  1157. assert(cs_prog_data->push.cross_thread.size == 0);
  1158. assert(cs_prog_data->push.per_thread.dwords == 1);
  1159. assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
  1160. for (unsigned t = 0; t < cs_prog_data->threads; t++)
  1161. dst[8 * t] = t;
  1162. }
  1163. /**
  1164. * Allocate scratch BOs as needed for the given per-thread size and stage.
  1165. */
  1166. struct iris_bo *
  1167. iris_get_scratch_space(struct iris_context *ice,
  1168. unsigned per_thread_scratch,
  1169. gl_shader_stage stage)
  1170. {
  1171. struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
  1172. struct iris_bufmgr *bufmgr = screen->bufmgr;
  1173. const struct gen_device_info *devinfo = &screen->devinfo;
  1174. unsigned encoded_size = ffs(per_thread_scratch) - 11;
  1175. assert(encoded_size < (1 << 16));
  1176. struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
  1177. /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
  1178. *
  1179. * "Scratch Space per slice is computed based on 4 sub-slices. SW
  1180. * must allocate scratch space enough so that each slice has 4
  1181. * slices allowed."
  1182. *
  1183. * According to the other driver team, this applies to compute shaders
  1184. * as well. This is not currently documented at all.
  1185. *
  1186. * This hack is no longer necessary on Gen11+.
  1187. */
  1188. unsigned subslice_total = screen->subslice_total;
  1189. if (devinfo->gen < 11)
  1190. subslice_total = 4 * devinfo->num_slices;
  1191. assert(subslice_total >= screen->subslice_total);
  1192. if (!*bop) {
  1193. unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
  1194. uint32_t max_threads[] = {
  1195. [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
  1196. [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
  1197. [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
  1198. [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
  1199. [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
  1200. [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
  1201. };
  1202. uint32_t size = per_thread_scratch * max_threads[stage];
  1203. *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
  1204. }
  1205. return *bop;
  1206. }
  1207. /* ------------------------------------------------------------------- */
  1208. /**
  1209. * The pipe->create_[stage]_state() driver hooks.
  1210. *
  1211. * Performs basic NIR preprocessing, records any state dependencies, and
  1212. * returns an iris_uncompiled_shader as the Gallium CSO.
  1213. *
  1214. * Actual shader compilation to assembly happens later, at first use.
  1215. */
  1216. static void *
  1217. iris_create_uncompiled_shader(struct pipe_context *ctx,
  1218. nir_shader *nir,
  1219. const struct pipe_stream_output_info *so_info)
  1220. {
  1221. struct iris_screen *screen = (struct iris_screen *)ctx->screen;
  1222. const struct gen_device_info *devinfo = &screen->devinfo;
  1223. struct iris_uncompiled_shader *ish =
  1224. calloc(1, sizeof(struct iris_uncompiled_shader));
  1225. if (!ish)
  1226. return NULL;
  1227. nir = brw_preprocess_nir(screen->compiler, nir, NULL);
  1228. NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
  1229. NIR_PASS_V(nir, iris_lower_storage_image_derefs);
  1230. ish->program_id = get_new_program_id(screen);
  1231. ish->nir = nir;
  1232. if (so_info) {
  1233. memcpy(&ish->stream_output, so_info, sizeof(*so_info));
  1234. update_so_info(&ish->stream_output, nir->info.outputs_written);
  1235. }
  1236. return ish;
  1237. }
  1238. static struct iris_uncompiled_shader *
  1239. iris_create_shader_state(struct pipe_context *ctx,
  1240. const struct pipe_shader_state *state)
  1241. {
  1242. struct nir_shader *nir;
  1243. if (state->type == PIPE_SHADER_IR_TGSI)
  1244. nir = tgsi_to_nir(state->tokens, ctx->screen);
  1245. else
  1246. nir = state->ir.nir;
  1247. return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
  1248. }
  1249. static void *
  1250. iris_create_vs_state(struct pipe_context *ctx,
  1251. const struct pipe_shader_state *state)
  1252. {
  1253. struct iris_context *ice = (void *) ctx;
  1254. struct iris_screen *screen = (void *) ctx->screen;
  1255. struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
  1256. /* User clip planes */
  1257. if (ish->nir->info.clip_distance_array_size == 0)
  1258. ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
  1259. if (screen->precompile) {
  1260. const struct gen_device_info *devinfo = &screen->devinfo;
  1261. struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
  1262. iris_compile_vs(ice, ish, &key);
  1263. }
  1264. return ish;
  1265. }
  1266. static void *
  1267. iris_create_tcs_state(struct pipe_context *ctx,
  1268. const struct pipe_shader_state *state)
  1269. {
  1270. struct iris_context *ice = (void *) ctx;
  1271. struct iris_screen *screen = (void *) ctx->screen;
  1272. struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
  1273. struct shader_info *info = &ish->nir->info;
  1274. // XXX: NOS?
  1275. if (screen->precompile) {
  1276. const unsigned _GL_TRIANGLES = 0x0004;
  1277. const struct gen_device_info *devinfo = &screen->devinfo;
  1278. struct brw_tcs_prog_key key = {
  1279. KEY_INIT(devinfo->gen),
  1280. // XXX: make sure the linker fills this out from the TES...
  1281. .tes_primitive_mode =
  1282. info->tess.primitive_mode ? info->tess.primitive_mode
  1283. : _GL_TRIANGLES,
  1284. .outputs_written = info->outputs_written,
  1285. .patch_outputs_written = info->patch_outputs_written,
  1286. };
  1287. iris_compile_tcs(ice, ish, &key);
  1288. }
  1289. return ish;
  1290. }
  1291. static void *
  1292. iris_create_tes_state(struct pipe_context *ctx,
  1293. const struct pipe_shader_state *state)
  1294. {
  1295. struct iris_context *ice = (void *) ctx;
  1296. struct iris_screen *screen = (void *) ctx->screen;
  1297. struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
  1298. struct shader_info *info = &ish->nir->info;
  1299. // XXX: NOS?
  1300. if (screen->precompile) {
  1301. const struct gen_device_info *devinfo = &screen->devinfo;
  1302. struct brw_tes_prog_key key = {
  1303. KEY_INIT(devinfo->gen),
  1304. // XXX: not ideal, need TCS output/TES input unification
  1305. .inputs_read = info->inputs_read,
  1306. .patch_inputs_read = info->patch_inputs_read,
  1307. };
  1308. iris_compile_tes(ice, ish, &key);
  1309. }
  1310. return ish;
  1311. }
  1312. static void *
  1313. iris_create_gs_state(struct pipe_context *ctx,
  1314. const struct pipe_shader_state *state)
  1315. {
  1316. struct iris_context *ice = (void *) ctx;
  1317. struct iris_screen *screen = (void *) ctx->screen;
  1318. struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
  1319. // XXX: NOS?
  1320. if (screen->precompile) {
  1321. const struct gen_device_info *devinfo = &screen->devinfo;
  1322. struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
  1323. iris_compile_gs(ice, ish, &key);
  1324. }
  1325. return ish;
  1326. }
  1327. static void *
  1328. iris_create_fs_state(struct pipe_context *ctx,
  1329. const struct pipe_shader_state *state)
  1330. {
  1331. struct iris_context *ice = (void *) ctx;
  1332. struct iris_screen *screen = (void *) ctx->screen;
  1333. struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
  1334. struct shader_info *info = &ish->nir->info;
  1335. ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
  1336. (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
  1337. (1ull << IRIS_NOS_RASTERIZER) |
  1338. (1ull << IRIS_NOS_BLEND);
  1339. /* The program key needs the VUE map if there are > 16 inputs */
  1340. if (util_bitcount64(ish->nir->info.inputs_read &
  1341. BRW_FS_VARYING_INPUT_MASK) > 16) {
  1342. ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
  1343. }
  1344. if (screen->precompile) {
  1345. const uint64_t color_outputs = info->outputs_written &
  1346. ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
  1347. BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
  1348. BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
  1349. bool can_rearrange_varyings =
  1350. util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
  1351. const struct gen_device_info *devinfo = &screen->devinfo;
  1352. struct brw_wm_prog_key key = {
  1353. KEY_INIT(devinfo->gen),
  1354. .nr_color_regions = util_bitcount(color_outputs),
  1355. .coherent_fb_fetch = true,
  1356. .input_slots_valid =
  1357. can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
  1358. };
  1359. iris_compile_fs(ice, ish, &key, NULL);
  1360. }
  1361. return ish;
  1362. }
  1363. static void *
  1364. iris_create_compute_state(struct pipe_context *ctx,
  1365. const struct pipe_compute_state *state)
  1366. {
  1367. assert(state->ir_type == PIPE_SHADER_IR_NIR);
  1368. struct iris_context *ice = (void *) ctx;
  1369. struct iris_screen *screen = (void *) ctx->screen;
  1370. struct iris_uncompiled_shader *ish =
  1371. iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
  1372. // XXX: disallow more than 64KB of shared variables
  1373. if (screen->precompile) {
  1374. const struct gen_device_info *devinfo = &screen->devinfo;
  1375. struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
  1376. iris_compile_cs(ice, ish, &key);
  1377. }
  1378. return ish;
  1379. }
  1380. /**
  1381. * The pipe->delete_[stage]_state() driver hooks.
  1382. *
  1383. * Frees the iris_uncompiled_shader.
  1384. */
  1385. static void
  1386. iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
  1387. {
  1388. struct iris_uncompiled_shader *ish = state;
  1389. struct iris_context *ice = (void *) ctx;
  1390. if (ice->shaders.uncompiled[stage] == ish) {
  1391. ice->shaders.uncompiled[stage] = NULL;
  1392. ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
  1393. }
  1394. ralloc_free(ish->nir);
  1395. free(ish);
  1396. }
  1397. static void
  1398. iris_delete_vs_state(struct pipe_context *ctx, void *state)
  1399. {
  1400. iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
  1401. }
  1402. static void
  1403. iris_delete_tcs_state(struct pipe_context *ctx, void *state)
  1404. {
  1405. iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
  1406. }
  1407. static void
  1408. iris_delete_tes_state(struct pipe_context *ctx, void *state)
  1409. {
  1410. iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
  1411. }
  1412. static void
  1413. iris_delete_gs_state(struct pipe_context *ctx, void *state)
  1414. {
  1415. iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
  1416. }
  1417. static void
  1418. iris_delete_fs_state(struct pipe_context *ctx, void *state)
  1419. {
  1420. iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
  1421. }
  1422. static void
  1423. iris_delete_cs_state(struct pipe_context *ctx, void *state)
  1424. {
  1425. iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
  1426. }
  1427. /**
  1428. * The pipe->bind_[stage]_state() driver hook.
  1429. *
  1430. * Binds an uncompiled shader as the current one for a particular stage.
  1431. * Updates dirty tracking to account for the shader's NOS.
  1432. */
  1433. static void
  1434. bind_state(struct iris_context *ice,
  1435. struct iris_uncompiled_shader *ish,
  1436. gl_shader_stage stage)
  1437. {
  1438. uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
  1439. const uint64_t nos = ish ? ish->nos : 0;
  1440. const struct shader_info *old_info = iris_get_shader_info(ice, stage);
  1441. const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
  1442. if ((old_info ? util_last_bit(old_info->textures_used) : 0) !=
  1443. (new_info ? util_last_bit(new_info->textures_used) : 0)) {
  1444. ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
  1445. }
  1446. ice->shaders.uncompiled[stage] = ish;
  1447. ice->state.dirty |= dirty_bit;
  1448. /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
  1449. * (or that they no longer need to do so).
  1450. */
  1451. for (int i = 0; i < IRIS_NOS_COUNT; i++) {
  1452. if (nos & (1 << i))
  1453. ice->state.dirty_for_nos[i] |= dirty_bit;
  1454. else
  1455. ice->state.dirty_for_nos[i] &= ~dirty_bit;
  1456. }
  1457. }
  1458. static void
  1459. iris_bind_vs_state(struct pipe_context *ctx, void *state)
  1460. {
  1461. bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
  1462. }
  1463. static void
  1464. iris_bind_tcs_state(struct pipe_context *ctx, void *state)
  1465. {
  1466. bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
  1467. }
  1468. static void
  1469. iris_bind_tes_state(struct pipe_context *ctx, void *state)
  1470. {
  1471. struct iris_context *ice = (struct iris_context *)ctx;
  1472. /* Enabling/disabling optional stages requires a URB reconfiguration. */
  1473. if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
  1474. ice->state.dirty |= IRIS_DIRTY_URB;
  1475. bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
  1476. }
  1477. static void
  1478. iris_bind_gs_state(struct pipe_context *ctx, void *state)
  1479. {
  1480. struct iris_context *ice = (struct iris_context *)ctx;
  1481. /* Enabling/disabling optional stages requires a URB reconfiguration. */
  1482. if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
  1483. ice->state.dirty |= IRIS_DIRTY_URB;
  1484. bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
  1485. }
  1486. static void
  1487. iris_bind_fs_state(struct pipe_context *ctx, void *state)
  1488. {
  1489. struct iris_context *ice = (struct iris_context *) ctx;
  1490. struct iris_uncompiled_shader *old_ish =
  1491. ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
  1492. struct iris_uncompiled_shader *new_ish = state;
  1493. const unsigned color_bits =
  1494. BITFIELD64_BIT(FRAG_RESULT_COLOR) |
  1495. BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
  1496. /* Fragment shader outputs influence HasWriteableRT */
  1497. if (!old_ish || !new_ish ||
  1498. (old_ish->nir->info.outputs_written & color_bits) !=
  1499. (new_ish->nir->info.outputs_written & color_bits))
  1500. ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
  1501. bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
  1502. }
  1503. static void
  1504. iris_bind_cs_state(struct pipe_context *ctx, void *state)
  1505. {
  1506. bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
  1507. }
  1508. void
  1509. iris_init_program_functions(struct pipe_context *ctx)
  1510. {
  1511. ctx->create_vs_state = iris_create_vs_state;
  1512. ctx->create_tcs_state = iris_create_tcs_state;
  1513. ctx->create_tes_state = iris_create_tes_state;
  1514. ctx->create_gs_state = iris_create_gs_state;
  1515. ctx->create_fs_state = iris_create_fs_state;
  1516. ctx->create_compute_state = iris_create_compute_state;
  1517. ctx->delete_vs_state = iris_delete_vs_state;
  1518. ctx->delete_tcs_state = iris_delete_tcs_state;
  1519. ctx->delete_tes_state = iris_delete_tes_state;
  1520. ctx->delete_gs_state = iris_delete_gs_state;
  1521. ctx->delete_fs_state = iris_delete_fs_state;
  1522. ctx->delete_compute_state = iris_delete_cs_state;
  1523. ctx->bind_vs_state = iris_bind_vs_state;
  1524. ctx->bind_tcs_state = iris_bind_tcs_state;
  1525. ctx->bind_tes_state = iris_bind_tes_state;
  1526. ctx->bind_gs_state = iris_bind_gs_state;
  1527. ctx->bind_fs_state = iris_bind_fs_state;
  1528. ctx->bind_compute_state = iris_bind_cs_state;
  1529. }