I had a bigger rework I was working on, but this is simple and gets tests passing. Fixes 36 failures in dEQP-VK.binding_model.shader_access.primary_cmd_buf.sampler_mutable.fragment.* (now all passing) Reviewed-by: Jonathan Marek <jonathan@marek.ca> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3124> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3124>master
@@ -2490,7 +2490,8 @@ struct tu_draw_state_group | |||
static struct tu_sampler* | |||
sampler_ptr(struct tu_descriptor_state *descriptors_state, | |||
const struct tu_descriptor_map *map, unsigned i) | |||
const struct tu_descriptor_map *map, unsigned i, | |||
unsigned array_index) | |||
{ | |||
assert(descriptors_state->valid & (1 << map->set[i])); | |||
@@ -2504,7 +2505,10 @@ sampler_ptr(struct tu_descriptor_state *descriptors_state, | |||
case VK_DESCRIPTOR_TYPE_SAMPLER: | |||
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4]; | |||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: | |||
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS]; | |||
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS + | |||
array_index * | |||
(A6XX_TEX_CONST_DWORDS + | |||
sizeof(struct tu_sampler) / 4)]; | |||
default: | |||
unreachable("unimplemented descriptor type"); | |||
break; | |||
@@ -2516,7 +2520,7 @@ write_tex_const(struct tu_cmd_buffer *cmd, | |||
uint32_t *dst, | |||
struct tu_descriptor_state *descriptors_state, | |||
const struct tu_descriptor_map *map, | |||
unsigned i) | |||
unsigned i, unsigned array_index) | |||
{ | |||
assert(descriptors_state->valid & (1 << map->set[i])); | |||
@@ -2528,11 +2532,19 @@ write_tex_const(struct tu_cmd_buffer *cmd, | |||
switch (layout->type) { | |||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: | |||
memcpy(dst, &set->mapped_ptr[layout->offset / 4], A6XX_TEX_CONST_DWORDS*4); | |||
memcpy(dst, &set->mapped_ptr[layout->offset / 4 + | |||
array_index * A6XX_TEX_CONST_DWORDS], | |||
A6XX_TEX_CONST_DWORDS * 4); | |||
break; | |||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: | |||
memcpy(dst, &set->mapped_ptr[layout->offset / 4 + | |||
array_index * | |||
(A6XX_TEX_CONST_DWORDS + | |||
sizeof(struct tu_sampler) / 4)], | |||
A6XX_TEX_CONST_DWORDS * 4); | |||
break; | |||
default: | |||
unreachable("unimplemented descriptor type"); | |||
@@ -2541,7 +2553,8 @@ write_tex_const(struct tu_cmd_buffer *cmd, | |||
if (layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) { | |||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config; | |||
uint32_t a = cmd->state.subpass->input_attachments[map->value[i]].attachment; | |||
uint32_t a = cmd->state.subpass->input_attachments[map->value[i] + | |||
array_index].attachment; | |||
assert(cmd->state.pass->attachments[a].needs_gmem); | |||
dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); | |||
@@ -2561,7 +2574,7 @@ write_tex_const(struct tu_cmd_buffer *cmd, | |||
static uint64_t | |||
buffer_ptr(struct tu_descriptor_state *descriptors_state, | |||
const struct tu_descriptor_map *map, | |||
unsigned i) | |||
unsigned i, unsigned array_index) | |||
{ | |||
assert(descriptors_state->valid & (1 << map->set[i])); | |||
@@ -2574,11 +2587,12 @@ buffer_ptr(struct tu_descriptor_state *descriptors_state, | |||
switch (layout->type) { | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: | |||
return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset]; | |||
return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset + | |||
array_index]; | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: | |||
return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 | | |||
set->mapped_ptr[layout->offset / 4]; | |||
return (uint64_t) set->mapped_ptr[layout->offset / 4 + array_index * 2 + 1] << 32 | | |||
set->mapped_ptr[layout->offset / 4 + array_index * 2]; | |||
default: | |||
unreachable("unimplemented descriptor type"); | |||
break; | |||
@@ -2663,7 +2677,22 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, | |||
continue; | |||
} | |||
uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1); | |||
/* Look through the UBO map to find our UBO index, and get the VA for | |||
* that UBO. | |||
*/ | |||
uint64_t va = 0; | |||
uint32_t ubo_idx = i - 1; | |||
uint32_t ubo_map_base = 0; | |||
for (int j = 0; j < link->ubo_map.num; j++) { | |||
if (ubo_idx >= ubo_map_base && | |||
ubo_idx < ubo_map_base + link->ubo_map.array_size[j]) { | |||
va = buffer_ptr(descriptors_state, &link->ubo_map, j, | |||
ubo_idx - ubo_map_base); | |||
break; | |||
} | |||
ubo_map_base += link->ubo_map.array_size[j]; | |||
} | |||
assert(va); | |||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); | |||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | | |||
@@ -2684,9 +2713,8 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline, | |||
const struct tu_program_descriptor_linkage *link = | |||
&pipeline->program.link[type]; | |||
uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos); | |||
uint32_t num = MIN2(link->ubo_map.num_desc, link->const_state.num_ubos); | |||
uint32_t anum = align(num, 2); | |||
uint32_t i; | |||
if (!num) | |||
return; | |||
@@ -2700,10 +2728,15 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline, | |||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); | |||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); | |||
for (i = 0; i < num; i++) | |||
tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i)); | |||
unsigned emitted = 0; | |||
for (unsigned i = 0; emitted < num && i < link->ubo_map.num; i++) { | |||
for (unsigned j = 0; emitted < num && j < link->ubo_map.array_size[i]; j++) { | |||
tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i, j)); | |||
emitted++; | |||
} | |||
} | |||
for (; i < anum; i++) { | |||
for (; emitted < anum; emitted++) { | |||
tu_cs_emit(cs, 0xffffffff); | |||
tu_cs_emit(cs, 0xffffffff); | |||
} | |||
@@ -2738,33 +2771,45 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd, | |||
&pipeline->program.link[type]; | |||
VkResult result; | |||
if (link->texture_map.num == 0 && link->sampler_map.num == 0) { | |||
if (link->texture_map.num_desc == 0 && link->sampler_map.num_desc == 0) { | |||
*entry = (struct tu_cs_entry) {}; | |||
return VK_SUCCESS; | |||
} | |||
/* allocate and fill texture state */ | |||
struct ts_cs_memory tex_const; | |||
result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const); | |||
result = tu_cs_alloc(device, draw_state, link->texture_map.num_desc, | |||
A6XX_TEX_CONST_DWORDS, &tex_const); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
int tex_index = 0; | |||
for (unsigned i = 0; i < link->texture_map.num; i++) { | |||
write_tex_const(cmd, | |||
&tex_const.map[A6XX_TEX_CONST_DWORDS*i], | |||
descriptors_state, &link->texture_map, i); | |||
for (int j = 0; j < link->texture_map.array_size[i]; j++) { | |||
write_tex_const(cmd, | |||
&tex_const.map[A6XX_TEX_CONST_DWORDS * tex_index++], | |||
descriptors_state, &link->texture_map, i, j); | |||
} | |||
} | |||
/* allocate and fill sampler state */ | |||
struct ts_cs_memory tex_samp; | |||
result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
struct ts_cs_memory tex_samp = { 0 }; | |||
if (link->sampler_map.num_desc) { | |||
result = tu_cs_alloc(device, draw_state, link->sampler_map.num_desc, | |||
A6XX_TEX_SAMP_DWORDS, &tex_samp); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
for (unsigned i = 0; i < link->sampler_map.num; i++) { | |||
struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i); | |||
memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state)); | |||
*needs_border |= sampler->needs_border; | |||
int sampler_index = 0; | |||
for (unsigned i = 0; i < link->sampler_map.num; i++) { | |||
for (int j = 0; j < link->sampler_map.array_size[i]; j++) { | |||
struct tu_sampler *sampler = sampler_ptr(descriptors_state, | |||
&link->sampler_map, i, j); | |||
memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS * sampler_index++], | |||
sampler->state, sizeof(sampler->state)); | |||
*needs_border |= sampler->needs_border; | |||
} | |||
} | |||
} | |||
unsigned tex_samp_reg, tex_const_reg, tex_count_reg; | |||
@@ -2798,17 +2843,19 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd, | |||
if (result != VK_SUCCESS) | |||
return result; | |||
/* output sampler state: */ | |||
tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3); | |||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | | |||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | | |||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | | |||
CP_LOAD_STATE6_0_STATE_BLOCK(sb) | | |||
CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num)); | |||
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ | |||
if (link->sampler_map.num_desc) { | |||
/* output sampler state: */ | |||
tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3); | |||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | | |||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | | |||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | | |||
CP_LOAD_STATE6_0_STATE_BLOCK(sb) | | |||
CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num_desc)); | |||
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ | |||
tu_cs_emit_pkt4(&cs, tex_samp_reg, 2); | |||
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ | |||
tu_cs_emit_pkt4(&cs, tex_samp_reg, 2); | |||
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ | |||
} | |||
/* emit texture state: */ | |||
tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3); | |||
@@ -2816,14 +2863,14 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd, | |||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | | |||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | | |||
CP_LOAD_STATE6_0_STATE_BLOCK(sb) | | |||
CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num)); | |||
CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num_desc)); | |||
tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */ | |||
tu_cs_emit_pkt4(&cs, tex_const_reg, 2); | |||
tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */ | |||
tu_cs_emit_pkt4(&cs, tex_count_reg, 1); | |||
tu_cs_emit(&cs, link->texture_map.num); | |||
tu_cs_emit(&cs, link->texture_map.num_desc); | |||
*entry = tu_cs_end_sub_stream(draw_state, &cs); | |||
return VK_SUCCESS; | |||
@@ -2860,7 +2907,8 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, | |||
if (idx & IBO_SSBO) { | |||
idx &= ~IBO_SSBO; | |||
uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx); | |||
uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx, | |||
0 /* XXX */); | |||
/* We don't expose robustBufferAccess, so leave the size unlimited. */ | |||
uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4; | |||
@@ -2992,21 +3040,26 @@ tu6_emit_border_color(struct tu_cmd_buffer *cmd, | |||
struct ts_cs_memory ptr; | |||
VkResult result = tu_cs_alloc(cmd->device, &cmd->sub_cs, | |||
vs_sampler->num + fs_sampler->num, 128 / 4, | |||
vs_sampler->num_desc + fs_sampler->num_desc, | |||
128 / 4, | |||
&ptr); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
for (unsigned i = 0; i < vs_sampler->num; i++) { | |||
struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i); | |||
memcpy(ptr.map, &border_color[sampler->border], 128); | |||
ptr.map += 128 / 4; | |||
for (unsigned j = 0; j < vs_sampler->array_size[i]; j++) { | |||
struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i, j); | |||
memcpy(ptr.map, &border_color[sampler->border], 128); | |||
ptr.map += 128 / 4; | |||
} | |||
} | |||
for (unsigned i = 0; i < fs_sampler->num; i++) { | |||
struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i); | |||
memcpy(ptr.map, &border_color[sampler->border], 128); | |||
ptr.map += 128 / 4; | |||
for (unsigned j = 0; j < fs_sampler->array_size[i]; j++) { | |||
struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i, j); | |||
memcpy(ptr.map, &border_color[sampler->border], 128); | |||
ptr.map += 128 / 4; | |||
} | |||
} | |||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2); |
@@ -21,6 +21,21 @@ | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |||
* DEALINGS IN THE SOFTWARE. | |||
*/ | |||
/** | |||
* @file | |||
* | |||
* The texture and sampler descriptors are laid out in a single global space | |||
* across all shader stages, for both simplicity of implementation and because | |||
* that seems to be how things have to be structured for border color | |||
* handling. | |||
* | |||
* Each shader stage will declare its texture/sampler count based on the last | |||
* descriptor set it uses. At draw emit time (though it really should be | |||
* CmdBind time), we upload the descriptor sets used by each shader stage to | |||
* their stage. | |||
*/ | |||
#include "tu_private.h" | |||
#include <assert.h> |
@@ -43,6 +43,7 @@ struct tu_pipeline_builder | |||
{ | |||
struct tu_device *device; | |||
struct tu_pipeline_cache *cache; | |||
struct tu_pipeline_layout *layout; | |||
const VkAllocationCallbacks *alloc; | |||
const VkGraphicsPipelineCreateInfo *create_info; | |||
@@ -358,7 +359,8 @@ tu6_blend_op(VkBlendOp op) | |||
} | |||
static void | |||
tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) | |||
tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader, | |||
const struct ir3_shader_variant *vs) | |||
{ | |||
uint32_t sp_vs_ctrl = | |||
A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | | |||
@@ -368,8 +370,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) | |||
if (vs->need_pixlod) | |||
sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE; | |||
uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) | | |||
A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp); | |||
uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) | | |||
A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc); | |||
if (vs->instrlen) | |||
sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED; | |||
@@ -386,7 +388,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) | |||
} | |||
static void | |||
tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) | |||
tu6_emit_hs_config(struct tu_cs *cs, struct tu_shader *shader, | |||
const struct ir3_shader_variant *hs) | |||
{ | |||
uint32_t sp_hs_config = 0; | |||
if (hs->instrlen) | |||
@@ -404,7 +407,8 @@ tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) | |||
} | |||
static void | |||
tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) | |||
tu6_emit_ds_config(struct tu_cs *cs, struct tu_shader *shader, | |||
const struct ir3_shader_variant *ds) | |||
{ | |||
uint32_t sp_ds_config = 0; | |||
if (ds->instrlen) | |||
@@ -419,7 +423,8 @@ tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) | |||
} | |||
static void | |||
tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) | |||
tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader, | |||
const struct ir3_shader_variant *gs) | |||
{ | |||
uint32_t sp_gs_config = 0; | |||
if (gs->instrlen) | |||
@@ -437,7 +442,8 @@ tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) | |||
} | |||
static void | |||
tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) | |||
tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, | |||
const struct ir3_shader_variant *fs) | |||
{ | |||
uint32_t sp_fs_ctrl = | |||
A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 | | |||
@@ -449,8 +455,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) | |||
if (fs->need_pixlod) | |||
sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; | |||
uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | | |||
A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp) | | |||
uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) | | |||
A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) | | |||
A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo); | |||
if (fs->instrlen) | |||
sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; | |||
@@ -477,7 +483,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) | |||
} | |||
static void | |||
tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v) | |||
tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, | |||
const struct ir3_shader_variant *v) | |||
{ | |||
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); | |||
tu_cs_emit(cs, 0xff); | |||
@@ -490,8 +497,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v) | |||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2); | |||
tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED | | |||
A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | | |||
A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | | |||
A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); | |||
A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) | | |||
A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc)); | |||
tu_cs_emit(cs, v->instrlen); | |||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG0, 1); | |||
@@ -1036,11 +1043,11 @@ tu6_emit_program(struct tu_cs *cs, | |||
fs = &dummy_variant; | |||
} | |||
tu6_emit_vs_config(cs, vs); | |||
tu6_emit_hs_config(cs, hs); | |||
tu6_emit_ds_config(cs, ds); | |||
tu6_emit_gs_config(cs, gs); | |||
tu6_emit_fs_config(cs, fs); | |||
tu6_emit_vs_config(cs, builder->shaders[MESA_SHADER_VERTEX], vs); | |||
tu6_emit_hs_config(cs, builder->shaders[MESA_SHADER_TESS_CTRL], hs); | |||
tu6_emit_ds_config(cs, builder->shaders[MESA_SHADER_TESS_EVAL], ds); | |||
tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs); | |||
tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs); | |||
tu6_emit_vs_system_values(cs, vs); | |||
tu6_emit_vpc(cs, vs, fs, binning_pass); | |||
@@ -1535,7 +1542,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) | |||
continue; | |||
struct tu_shader *shader = | |||
tu_shader_create(builder->device, stage, stage_info, builder->alloc); | |||
tu_shader_create(builder->device, stage, stage_info, builder->layout, | |||
builder->alloc); | |||
if (!shader) | |||
return VK_ERROR_OUT_OF_HOST_MEMORY; | |||
@@ -1910,11 +1918,14 @@ tu_pipeline_builder_init_graphics( | |||
const VkGraphicsPipelineCreateInfo *create_info, | |||
const VkAllocationCallbacks *alloc) | |||
{ | |||
TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout); | |||
*builder = (struct tu_pipeline_builder) { | |||
.device = dev, | |||
.cache = cache, | |||
.create_info = create_info, | |||
.alloc = alloc, | |||
.layout = layout, | |||
}; | |||
builder->rasterizer_discard = | |||
@@ -2003,7 +2014,7 @@ tu6_emit_compute_program(struct tu_cs *cs, | |||
{ | |||
const struct ir3_shader_variant *v = &shader->variants[0]; | |||
tu6_emit_cs_config(cs, v); | |||
tu6_emit_cs_config(cs, shader, v); | |||
/* The compute program is the only one in the pipeline, so 0 offset. */ | |||
tu6_emit_shader_object(cs, MESA_SHADER_COMPUTE, v, binary_bo, 0); | |||
@@ -2044,6 +2055,7 @@ tu_compute_pipeline_create(VkDevice device, | |||
VkPipeline *pPipeline) | |||
{ | |||
TU_FROM_HANDLE(tu_device, dev, device); | |||
TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout); | |||
const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage; | |||
VkResult result; | |||
@@ -2053,11 +2065,13 @@ tu_compute_pipeline_create(VkDevice device, | |||
if (result != VK_SUCCESS) | |||
return result; | |||
pipeline->layout = layout; | |||
struct tu_shader_compile_options options; | |||
tu_shader_compile_options_init(&options, NULL); | |||
struct tu_shader *shader = | |||
tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, pAllocator); | |||
tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, layout, pAllocator); | |||
if (!shader) { | |||
result = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto fail; |
@@ -1050,10 +1050,12 @@ struct tu_shader_compile_options | |||
struct tu_descriptor_map | |||
{ | |||
/* TODO: avoid fixed size array/justify the size */ | |||
unsigned num; | |||
unsigned num; /* number of array entries */ | |||
unsigned num_desc; /* Number of descriptors (sum of array_size[]) */ | |||
int set[64]; | |||
int binding[64]; | |||
int value[64]; | |||
int array_size[64]; | |||
}; | |||
struct tu_shader | |||
@@ -1080,6 +1082,7 @@ struct tu_shader * | |||
tu_shader_create(struct tu_device *dev, | |||
gl_shader_stage stage, | |||
const VkPipelineShaderStageCreateInfo *stage_info, | |||
struct tu_pipeline_layout *layout, | |||
const VkAllocationCallbacks *alloc); | |||
void |
@@ -109,26 +109,35 @@ tu_sort_variables_by_location(struct exec_list *variables) | |||
} | |||
static unsigned | |||
map_add(struct tu_descriptor_map *map, int set, int binding, int value) | |||
map_add(struct tu_descriptor_map *map, int set, int binding, int value, | |||
int array_size) | |||
{ | |||
unsigned index; | |||
for (index = 0; index < map->num; index++) { | |||
if (set == map->set[index] && binding == map->binding[index]) | |||
break; | |||
unsigned index = 0; | |||
for (unsigned i = 0; i < map->num; i++) { | |||
if (set == map->set[i] && binding == map->binding[i]) { | |||
assert(value == map->value[i]); | |||
assert(array_size == map->array_size[i]); | |||
return index; | |||
} | |||
index += map->array_size[i]; | |||
} | |||
assert(index < ARRAY_SIZE(map->set)); | |||
assert(index == map->num_desc); | |||
map->set[map->num] = set; | |||
map->binding[map->num] = binding; | |||
map->value[map->num] = value; | |||
map->array_size[map->num] = array_size; | |||
map->num++; | |||
map->num_desc += array_size; | |||
map->set[index] = set; | |||
map->binding[index] = binding; | |||
map->value[index] = value; | |||
map->num = MAX2(map->num, index + 1); | |||
return index; | |||
} | |||
static void | |||
lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, | |||
struct tu_shader *shader) | |||
struct tu_shader *shader, | |||
const struct tu_pipeline_layout *layout) | |||
{ | |||
nir_ssa_def *index = NULL; | |||
unsigned base_index = 0; | |||
@@ -184,39 +193,39 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, | |||
nir_tex_instr_remove_src(instr, src_idx); | |||
} | |||
if (array_elements > 1) | |||
tu_finishme("texture/sampler array"); | |||
if (is_sampler) { | |||
instr->sampler_index = map_add(&shader->sampler_map, | |||
deref->var->data.descriptor_set, | |||
deref->var->data.binding, | |||
0); | |||
instr->sampler_index += base_index; | |||
} else { | |||
instr->texture_index = map_add(&shader->texture_map, | |||
deref->var->data.descriptor_set, | |||
deref->var->data.binding, | |||
deref->var->data.index); | |||
instr->texture_index += base_index; | |||
instr->texture_array_size = array_elements; | |||
} | |||
uint32_t set = deref->var->data.descriptor_set; | |||
uint32_t binding = deref->var->data.binding; | |||
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; | |||
struct tu_descriptor_set_binding_layout *binding_layout = | |||
&set_layout->binding[binding]; | |||
int desc_index = map_add(is_sampler ? | |||
&shader->sampler_map : &shader->texture_map, | |||
deref->var->data.descriptor_set, | |||
deref->var->data.binding, | |||
deref->var->data.index, | |||
binding_layout->array_size) + base_index; | |||
if (is_sampler) | |||
instr->sampler_index = desc_index; | |||
else | |||
instr->texture_index = desc_index; | |||
} | |||
static bool | |||
lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader) | |||
lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader, | |||
const struct tu_pipeline_layout *layout) | |||
{ | |||
int texture_idx = | |||
nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); | |||
if (texture_idx >= 0) | |||
lower_tex_src_to_offset(b, instr, texture_idx, shader); | |||
lower_tex_src_to_offset(b, instr, texture_idx, shader, layout); | |||
int sampler_idx = | |||
nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); | |||
if (sampler_idx >= 0) | |||
lower_tex_src_to_offset(b, instr, sampler_idx, shader); | |||
lower_tex_src_to_offset(b, instr, sampler_idx, shader, layout); | |||
if (texture_idx < 0 && sampler_idx < 0) | |||
return false; | |||
@@ -226,7 +235,8 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader) | |||
static bool | |||
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, | |||
struct tu_shader *shader) | |||
struct tu_shader *shader, | |||
const struct tu_pipeline_layout *layout) | |||
{ | |||
/* TODO: remove this when layered rendering is implemented */ | |||
if (instr->intrinsic == nir_intrinsic_load_layer_id) { | |||
@@ -260,23 +270,30 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, | |||
return false; | |||
nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); | |||
if (!const_val || const_val->u32 != 0) | |||
tu_finishme("non-zero vulkan_resource_index array index"); | |||
unsigned set = nir_intrinsic_desc_set(instr); | |||
unsigned binding = nir_intrinsic_binding(instr); | |||
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; | |||
struct tu_descriptor_set_binding_layout *binding_layout = | |||
&set_layout->binding[binding]; | |||
unsigned index = 0; | |||
switch (nir_intrinsic_desc_type(instr)) { | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: | |||
if (!const_val || const_val->u32 != 0) | |||
tu_finishme("non-zero vulkan_resource_index array index"); | |||
/* skip index 0 which is used for push constants */ | |||
index = map_add(&shader->ubo_map, set, binding, 0) + 1; | |||
index = map_add(&shader->ubo_map, set, binding, 0, | |||
binding_layout->array_size) + 1; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: | |||
index = map_add(&shader->ssbo_map, set, binding, 0); | |||
if (!const_val) | |||
tu_finishme("non-constant vulkan_resource_index array index"); | |||
index = map_add(&shader->ssbo_map, set, binding, 0, | |||
binding_layout->array_size); | |||
break; | |||
default: | |||
tu_finishme("unsupported desc_type for vulkan_resource_index"); | |||
@@ -291,7 +308,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, | |||
} | |||
static bool | |||
lower_impl(nir_function_impl *impl, struct tu_shader *shader) | |||
lower_impl(nir_function_impl *impl, struct tu_shader *shader, | |||
const struct tu_pipeline_layout *layout) | |||
{ | |||
nir_builder b; | |||
nir_builder_init(&b, impl); | |||
@@ -302,10 +320,10 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader) | |||
b.cursor = nir_before_instr(instr); | |||
switch (instr->type) { | |||
case nir_instr_type_tex: | |||
progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader); | |||
progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader, layout); | |||
break; | |||
case nir_instr_type_intrinsic: | |||
progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader); | |||
progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout); | |||
break; | |||
default: | |||
break; | |||
@@ -317,13 +335,14 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader) | |||
} | |||
static bool | |||
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader) | |||
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, | |||
const struct tu_pipeline_layout *layout) | |||
{ | |||
bool progress = false; | |||
nir_foreach_function(function, shader) { | |||
if (function->impl) | |||
progress |= lower_impl(function->impl, tu_shader); | |||
progress |= lower_impl(function->impl, tu_shader, layout); | |||
} | |||
return progress; | |||
@@ -333,6 +352,7 @@ struct tu_shader * | |||
tu_shader_create(struct tu_device *dev, | |||
gl_shader_stage stage, | |||
const VkPipelineShaderStageCreateInfo *stage_info, | |||
struct tu_pipeline_layout *layout, | |||
const VkAllocationCallbacks *alloc) | |||
{ | |||
const struct tu_shader_module *module = | |||
@@ -426,7 +446,7 @@ tu_shader_create(struct tu_device *dev, | |||
if (stage == MESA_SHADER_FRAGMENT) | |||
NIR_PASS_V(nir, nir_lower_input_attachments, true); | |||
NIR_PASS_V(nir, tu_lower_io, shader); | |||
NIR_PASS_V(nir, tu_lower_io, shader, layout); | |||
NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0); | |||