Bläddra i källkod

turnip: Add support for descriptor arrays.

I had a bigger rework I was working on, but this is simple and gets tests
passing.

Fixes 36 failures in
dEQP-VK.binding_model.shader_access.primary_cmd_buf.sampler_mutable.fragment.*
(now all passing)

Reviewed-by: Jonathan Marek <jonathan@marek.ca>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3124>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3124>
master
Eric Anholt 5 år sedan
förälder
incheckning
2d3182b429

+ 102
- 49
src/freedreno/vulkan/tu_cmd_buffer.c Visa fil

@@ -2490,7 +2490,8 @@ struct tu_draw_state_group

static struct tu_sampler*
sampler_ptr(struct tu_descriptor_state *descriptors_state,
const struct tu_descriptor_map *map, unsigned i)
const struct tu_descriptor_map *map, unsigned i,
unsigned array_index)
{
assert(descriptors_state->valid & (1 << map->set[i]));

@@ -2504,7 +2505,10 @@ sampler_ptr(struct tu_descriptor_state *descriptors_state,
case VK_DESCRIPTOR_TYPE_SAMPLER:
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4];
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS];
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS +
array_index *
(A6XX_TEX_CONST_DWORDS +
sizeof(struct tu_sampler) / 4)];
default:
unreachable("unimplemented descriptor type");
break;
@@ -2516,7 +2520,7 @@ write_tex_const(struct tu_cmd_buffer *cmd,
uint32_t *dst,
struct tu_descriptor_state *descriptors_state,
const struct tu_descriptor_map *map,
unsigned i)
unsigned i, unsigned array_index)
{
assert(descriptors_state->valid & (1 << map->set[i]));

@@ -2528,11 +2532,19 @@ write_tex_const(struct tu_cmd_buffer *cmd,

switch (layout->type) {
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
memcpy(dst, &set->mapped_ptr[layout->offset / 4], A6XX_TEX_CONST_DWORDS*4);
memcpy(dst, &set->mapped_ptr[layout->offset / 4 +
array_index * A6XX_TEX_CONST_DWORDS],
A6XX_TEX_CONST_DWORDS * 4);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
memcpy(dst, &set->mapped_ptr[layout->offset / 4 +
array_index *
(A6XX_TEX_CONST_DWORDS +
sizeof(struct tu_sampler) / 4)],
A6XX_TEX_CONST_DWORDS * 4);
break;
default:
unreachable("unimplemented descriptor type");
@@ -2541,7 +2553,8 @@ write_tex_const(struct tu_cmd_buffer *cmd,

if (layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
uint32_t a = cmd->state.subpass->input_attachments[map->value[i]].attachment;
uint32_t a = cmd->state.subpass->input_attachments[map->value[i] +
array_index].attachment;

assert(cmd->state.pass->attachments[a].needs_gmem);
dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
@@ -2561,7 +2574,7 @@ write_tex_const(struct tu_cmd_buffer *cmd,
static uint64_t
buffer_ptr(struct tu_descriptor_state *descriptors_state,
const struct tu_descriptor_map *map,
unsigned i)
unsigned i, unsigned array_index)
{
assert(descriptors_state->valid & (1 << map->set[i]));

@@ -2574,11 +2587,12 @@ buffer_ptr(struct tu_descriptor_state *descriptors_state,
switch (layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset];
return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset +
array_index];
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 |
set->mapped_ptr[layout->offset / 4];
return (uint64_t) set->mapped_ptr[layout->offset / 4 + array_index * 2 + 1] << 32 |
set->mapped_ptr[layout->offset / 4 + array_index * 2];
default:
unreachable("unimplemented descriptor type");
break;
@@ -2663,7 +2677,22 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
continue;
}

uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1);
/* Look through the UBO map to find our UBO index, and get the VA for
* that UBO.
*/
uint64_t va = 0;
uint32_t ubo_idx = i - 1;
uint32_t ubo_map_base = 0;
for (int j = 0; j < link->ubo_map.num; j++) {
if (ubo_idx >= ubo_map_base &&
ubo_idx < ubo_map_base + link->ubo_map.array_size[j]) {
va = buffer_ptr(descriptors_state, &link->ubo_map, j,
ubo_idx - ubo_map_base);
break;
}
ubo_map_base += link->ubo_map.array_size[j];
}
assert(va);

tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
@@ -2684,9 +2713,8 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
const struct tu_program_descriptor_linkage *link =
&pipeline->program.link[type];

uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos);
uint32_t num = MIN2(link->ubo_map.num_desc, link->const_state.num_ubos);
uint32_t anum = align(num, 2);
uint32_t i;

if (!num)
return;
@@ -2700,10 +2728,15 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));

for (i = 0; i < num; i++)
tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i));
unsigned emitted = 0;
for (unsigned i = 0; emitted < num && i < link->ubo_map.num; i++) {
for (unsigned j = 0; emitted < num && j < link->ubo_map.array_size[i]; j++) {
tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i, j));
emitted++;
}
}

for (; i < anum; i++) {
for (; emitted < anum; emitted++) {
tu_cs_emit(cs, 0xffffffff);
tu_cs_emit(cs, 0xffffffff);
}
@@ -2738,33 +2771,45 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
&pipeline->program.link[type];
VkResult result;

if (link->texture_map.num == 0 && link->sampler_map.num == 0) {
if (link->texture_map.num_desc == 0 && link->sampler_map.num_desc == 0) {
*entry = (struct tu_cs_entry) {};
return VK_SUCCESS;
}

/* allocate and fill texture state */
struct ts_cs_memory tex_const;
result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const);
result = tu_cs_alloc(device, draw_state, link->texture_map.num_desc,
A6XX_TEX_CONST_DWORDS, &tex_const);
if (result != VK_SUCCESS)
return result;

int tex_index = 0;
for (unsigned i = 0; i < link->texture_map.num; i++) {
write_tex_const(cmd,
&tex_const.map[A6XX_TEX_CONST_DWORDS*i],
descriptors_state, &link->texture_map, i);
for (int j = 0; j < link->texture_map.array_size[i]; j++) {
write_tex_const(cmd,
&tex_const.map[A6XX_TEX_CONST_DWORDS * tex_index++],
descriptors_state, &link->texture_map, i, j);
}
}

/* allocate and fill sampler state */
struct ts_cs_memory tex_samp;
result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp);
if (result != VK_SUCCESS)
return result;
struct ts_cs_memory tex_samp = { 0 };
if (link->sampler_map.num_desc) {
result = tu_cs_alloc(device, draw_state, link->sampler_map.num_desc,
A6XX_TEX_SAMP_DWORDS, &tex_samp);
if (result != VK_SUCCESS)
return result;

for (unsigned i = 0; i < link->sampler_map.num; i++) {
struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i);
memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state));
*needs_border |= sampler->needs_border;
int sampler_index = 0;
for (unsigned i = 0; i < link->sampler_map.num; i++) {
for (int j = 0; j < link->sampler_map.array_size[i]; j++) {
struct tu_sampler *sampler = sampler_ptr(descriptors_state,
&link->sampler_map, i, j);
memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS * sampler_index++],
sampler->state, sizeof(sampler->state));
*needs_border |= sampler->needs_border;
}
}
}

unsigned tex_samp_reg, tex_const_reg, tex_count_reg;
@@ -2798,17 +2843,19 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
if (result != VK_SUCCESS)
return result;

/* output sampler state: */
tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num));
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
if (link->sampler_map.num_desc) {
/* output sampler state: */
tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num_desc));
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */

tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
}

/* emit texture state: */
tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
@@ -2816,14 +2863,14 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num));
CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num_desc));
tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */

tu_cs_emit_pkt4(&cs, tex_const_reg, 2);
tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */

tu_cs_emit_pkt4(&cs, tex_count_reg, 1);
tu_cs_emit(&cs, link->texture_map.num);
tu_cs_emit(&cs, link->texture_map.num_desc);

*entry = tu_cs_end_sub_stream(draw_state, &cs);
return VK_SUCCESS;
@@ -2860,7 +2907,8 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd,
if (idx & IBO_SSBO) {
idx &= ~IBO_SSBO;

uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx);
uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx,
0 /* XXX */);
/* We don't expose robustBufferAccess, so leave the size unlimited. */
uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;

@@ -2992,21 +3040,26 @@ tu6_emit_border_color(struct tu_cmd_buffer *cmd,
struct ts_cs_memory ptr;

VkResult result = tu_cs_alloc(cmd->device, &cmd->sub_cs,
vs_sampler->num + fs_sampler->num, 128 / 4,
vs_sampler->num_desc + fs_sampler->num_desc,
128 / 4,
&ptr);
if (result != VK_SUCCESS)
return result;

for (unsigned i = 0; i < vs_sampler->num; i++) {
struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i);
memcpy(ptr.map, &border_color[sampler->border], 128);
ptr.map += 128 / 4;
for (unsigned j = 0; j < vs_sampler->array_size[i]; j++) {
struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i, j);
memcpy(ptr.map, &border_color[sampler->border], 128);
ptr.map += 128 / 4;
}
}

for (unsigned i = 0; i < fs_sampler->num; i++) {
struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i);
memcpy(ptr.map, &border_color[sampler->border], 128);
ptr.map += 128 / 4;
for (unsigned j = 0; j < fs_sampler->array_size[i]; j++) {
struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i, j);
memcpy(ptr.map, &border_color[sampler->border], 128);
ptr.map += 128 / 4;
}
}

tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2);

+ 15
- 0
src/freedreno/vulkan/tu_descriptor_set.c Visa fil

@@ -21,6 +21,21 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

/**
* @file
*
* The texture and sampler descriptors are laid out in a single global space
* across all shader stages, for both simplicity of implementation and because
* that seems to be how things have to be structured for border color
* handling.
*
* Each shader stage will declare its texture/sampler count based on the last
* descriptor set it uses. At draw emit time (though it really should be
* CmdBind time), we upload the descriptor sets used by each shader stage to
* their stage.
*/

#include "tu_private.h"

#include <assert.h>

+ 34
- 20
src/freedreno/vulkan/tu_pipeline.c Visa fil

@@ -43,6 +43,7 @@ struct tu_pipeline_builder
{
struct tu_device *device;
struct tu_pipeline_cache *cache;
struct tu_pipeline_layout *layout;
const VkAllocationCallbacks *alloc;
const VkGraphicsPipelineCreateInfo *create_info;

@@ -358,7 +359,8 @@ tu6_blend_op(VkBlendOp op)
}

static void
tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs)
tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader,
const struct ir3_shader_variant *vs)
{
uint32_t sp_vs_ctrl =
A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
@@ -368,8 +370,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs)
if (vs->need_pixlod)
sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE;

uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) |
A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp);
uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc);
if (vs->instrlen)
sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED;

@@ -386,7 +388,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs)
}

static void
tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs)
tu6_emit_hs_config(struct tu_cs *cs, struct tu_shader *shader,
const struct ir3_shader_variant *hs)
{
uint32_t sp_hs_config = 0;
if (hs->instrlen)
@@ -404,7 +407,8 @@ tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs)
}

static void
tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds)
tu6_emit_ds_config(struct tu_cs *cs, struct tu_shader *shader,
const struct ir3_shader_variant *ds)
{
uint32_t sp_ds_config = 0;
if (ds->instrlen)
@@ -419,7 +423,8 @@ tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds)
}

static void
tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs)
tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader,
const struct ir3_shader_variant *gs)
{
uint32_t sp_gs_config = 0;
if (gs->instrlen)
@@ -437,7 +442,8 @@ tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs)
}

static void
tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader,
const struct ir3_shader_variant *fs)
{
uint32_t sp_fs_ctrl =
A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 |
@@ -449,8 +455,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
if (fs->need_pixlod)
sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE;

uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) |
A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp) |
uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) |
A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo);
if (fs->instrlen)
sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED;
@@ -477,7 +483,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
}

static void
tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v)
tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
const struct ir3_shader_variant *v)
{
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
tu_cs_emit(cs, 0xff);
@@ -490,8 +497,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v)
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2);
tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED |
A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) |
A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
A6XX_SP_CS_CONFIG_NSAMP(v->num_samp));
A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc));
tu_cs_emit(cs, v->instrlen);

tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG0, 1);
@@ -1036,11 +1043,11 @@ tu6_emit_program(struct tu_cs *cs,
fs = &dummy_variant;
}

tu6_emit_vs_config(cs, vs);
tu6_emit_hs_config(cs, hs);
tu6_emit_ds_config(cs, ds);
tu6_emit_gs_config(cs, gs);
tu6_emit_fs_config(cs, fs);
tu6_emit_vs_config(cs, builder->shaders[MESA_SHADER_VERTEX], vs);
tu6_emit_hs_config(cs, builder->shaders[MESA_SHADER_TESS_CTRL], hs);
tu6_emit_ds_config(cs, builder->shaders[MESA_SHADER_TESS_EVAL], ds);
tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs);
tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs);

tu6_emit_vs_system_values(cs, vs);
tu6_emit_vpc(cs, vs, fs, binning_pass);
@@ -1535,7 +1542,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
continue;

struct tu_shader *shader =
tu_shader_create(builder->device, stage, stage_info, builder->alloc);
tu_shader_create(builder->device, stage, stage_info, builder->layout,
builder->alloc);
if (!shader)
return VK_ERROR_OUT_OF_HOST_MEMORY;

@@ -1910,11 +1918,14 @@ tu_pipeline_builder_init_graphics(
const VkGraphicsPipelineCreateInfo *create_info,
const VkAllocationCallbacks *alloc)
{
TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout);

*builder = (struct tu_pipeline_builder) {
.device = dev,
.cache = cache,
.create_info = create_info,
.alloc = alloc,
.layout = layout,
};

builder->rasterizer_discard =
@@ -2003,7 +2014,7 @@ tu6_emit_compute_program(struct tu_cs *cs,
{
const struct ir3_shader_variant *v = &shader->variants[0];

tu6_emit_cs_config(cs, v);
tu6_emit_cs_config(cs, shader, v);

/* The compute program is the only one in the pipeline, so 0 offset. */
tu6_emit_shader_object(cs, MESA_SHADER_COMPUTE, v, binary_bo, 0);
@@ -2044,6 +2055,7 @@ tu_compute_pipeline_create(VkDevice device,
VkPipeline *pPipeline)
{
TU_FROM_HANDLE(tu_device, dev, device);
TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout);
const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage;
VkResult result;

@@ -2053,11 +2065,13 @@ tu_compute_pipeline_create(VkDevice device,
if (result != VK_SUCCESS)
return result;

pipeline->layout = layout;

struct tu_shader_compile_options options;
tu_shader_compile_options_init(&options, NULL);

struct tu_shader *shader =
tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, pAllocator);
tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, layout, pAllocator);
if (!shader) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;

+ 4
- 1
src/freedreno/vulkan/tu_private.h Visa fil

@@ -1050,10 +1050,12 @@ struct tu_shader_compile_options
struct tu_descriptor_map
{
/* TODO: avoid fixed size array/justify the size */
unsigned num;
unsigned num; /* number of array entries */
unsigned num_desc; /* Number of descriptors (sum of array_size[]) */
int set[64];
int binding[64];
int value[64];
int array_size[64];
};

struct tu_shader
@@ -1080,6 +1082,7 @@ struct tu_shader *
tu_shader_create(struct tu_device *dev,
gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc);

void

+ 62
- 42
src/freedreno/vulkan/tu_shader.c Visa fil

@@ -109,26 +109,35 @@ tu_sort_variables_by_location(struct exec_list *variables)
}

static unsigned
map_add(struct tu_descriptor_map *map, int set, int binding, int value)
map_add(struct tu_descriptor_map *map, int set, int binding, int value,
int array_size)
{
unsigned index;
for (index = 0; index < map->num; index++) {
if (set == map->set[index] && binding == map->binding[index])
break;
unsigned index = 0;
for (unsigned i = 0; i < map->num; i++) {
if (set == map->set[i] && binding == map->binding[i]) {
assert(value == map->value[i]);
assert(array_size == map->array_size[i]);
return index;
}
index += map->array_size[i];
}

assert(index < ARRAY_SIZE(map->set));
assert(index == map->num_desc);

map->set[map->num] = set;
map->binding[map->num] = binding;
map->value[map->num] = value;
map->array_size[map->num] = array_size;
map->num++;
map->num_desc += array_size;

map->set[index] = set;
map->binding[index] = binding;
map->value[index] = value;
map->num = MAX2(map->num, index + 1);
return index;
}

static void
lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
struct tu_shader *shader)
struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
{
nir_ssa_def *index = NULL;
unsigned base_index = 0;
@@ -184,39 +193,39 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
nir_tex_instr_remove_src(instr, src_idx);
}

if (array_elements > 1)
tu_finishme("texture/sampler array");

if (is_sampler) {
instr->sampler_index = map_add(&shader->sampler_map,
deref->var->data.descriptor_set,
deref->var->data.binding,
0);
instr->sampler_index += base_index;
} else {
instr->texture_index = map_add(&shader->texture_map,
deref->var->data.descriptor_set,
deref->var->data.binding,
deref->var->data.index);
instr->texture_index += base_index;
instr->texture_array_size = array_elements;
}
uint32_t set = deref->var->data.descriptor_set;
uint32_t binding = deref->var->data.binding;
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
struct tu_descriptor_set_binding_layout *binding_layout =
&set_layout->binding[binding];

int desc_index = map_add(is_sampler ?
&shader->sampler_map : &shader->texture_map,
deref->var->data.descriptor_set,
deref->var->data.binding,
deref->var->data.index,
binding_layout->array_size) + base_index;
if (is_sampler)
instr->sampler_index = desc_index;
else
instr->texture_index = desc_index;
}

static bool
lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader)
lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
{
int texture_idx =
nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);

if (texture_idx >= 0)
lower_tex_src_to_offset(b, instr, texture_idx, shader);
lower_tex_src_to_offset(b, instr, texture_idx, shader, layout);

int sampler_idx =
nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);

if (sampler_idx >= 0)
lower_tex_src_to_offset(b, instr, sampler_idx, shader);
lower_tex_src_to_offset(b, instr, sampler_idx, shader, layout);

if (texture_idx < 0 && sampler_idx < 0)
return false;
@@ -226,7 +235,8 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader)

static bool
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
struct tu_shader *shader)
struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
{
/* TODO: remove this when layered rendering is implemented */
if (instr->intrinsic == nir_intrinsic_load_layer_id) {
@@ -260,23 +270,30 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
return false;

nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
if (!const_val || const_val->u32 != 0)
tu_finishme("non-zero vulkan_resource_index array index");


unsigned set = nir_intrinsic_desc_set(instr);
unsigned binding = nir_intrinsic_binding(instr);
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
struct tu_descriptor_set_binding_layout *binding_layout =
&set_layout->binding[binding];
unsigned index = 0;

switch (nir_intrinsic_desc_type(instr)) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
if (!const_val || const_val->u32 != 0)
tu_finishme("non-zero vulkan_resource_index array index");
/* skip index 0 which is used for push constants */
index = map_add(&shader->ubo_map, set, binding, 0) + 1;
index = map_add(&shader->ubo_map, set, binding, 0,
binding_layout->array_size) + 1;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
index = map_add(&shader->ssbo_map, set, binding, 0);
if (!const_val)
tu_finishme("non-constant vulkan_resource_index array index");
index = map_add(&shader->ssbo_map, set, binding, 0,
binding_layout->array_size);
break;
default:
tu_finishme("unsupported desc_type for vulkan_resource_index");
@@ -291,7 +308,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
}

static bool
lower_impl(nir_function_impl *impl, struct tu_shader *shader)
lower_impl(nir_function_impl *impl, struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
{
nir_builder b;
nir_builder_init(&b, impl);
@@ -302,10 +320,10 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader)
b.cursor = nir_before_instr(instr);
switch (instr->type) {
case nir_instr_type_tex:
progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader);
progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader, layout);
break;
case nir_instr_type_intrinsic:
progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader);
progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
break;
default:
break;
@@ -317,13 +335,14 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader)
}

static bool
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader)
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
const struct tu_pipeline_layout *layout)
{
bool progress = false;

nir_foreach_function(function, shader) {
if (function->impl)
progress |= lower_impl(function->impl, tu_shader);
progress |= lower_impl(function->impl, tu_shader, layout);
}

return progress;
@@ -333,6 +352,7 @@ struct tu_shader *
tu_shader_create(struct tu_device *dev,
gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc)
{
const struct tu_shader_module *module =
@@ -426,7 +446,7 @@ tu_shader_create(struct tu_device *dev,
if (stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(nir, nir_lower_input_attachments, true);

NIR_PASS_V(nir, tu_lower_io, shader);
NIR_PASS_V(nir, tu_lower_io, shader, layout);

NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0);


Laddar…
Avbryt
Spara