|
|
@@ -196,6 +196,23 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
cmd_buffer->state.dirty |= dest_mask; |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
struct radv_pipeline *pipeline) |
|
|
|
{ |
|
|
|
struct radv_streamout_state *so = &cmd_buffer->state.streamout; |
|
|
|
struct radv_shader_info *info; |
|
|
|
|
|
|
|
if (!pipeline->streamout_shader) |
|
|
|
return; |
|
|
|
|
|
|
|
info = &pipeline->streamout_shader->info.info; |
|
|
|
for (int i = 0; i < MAX_SO_BUFFERS; i++) |
|
|
|
so->stride_in_dw[i] = info->so.strides[i]; |
|
|
|
|
|
|
|
so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask; |
|
|
|
} |
|
|
|
|
|
|
|
bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) |
|
|
|
{ |
|
|
|
return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && |
|
|
@@ -1875,10 +1892,94 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) |
|
|
|
{ |
|
|
|
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; |
|
|
|
struct radv_userdata_info *loc; |
|
|
|
uint32_t base_reg; |
|
|
|
|
|
|
|
for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { |
|
|
|
if (!radv_get_shader(pipeline, stage)) |
|
|
|
continue; |
|
|
|
|
|
|
|
loc = radv_lookup_user_sgpr(pipeline, stage, |
|
|
|
AC_UD_STREAMOUT_BUFFERS); |
|
|
|
if (loc->sgpr_idx == -1) |
|
|
|
continue; |
|
|
|
|
|
|
|
base_reg = pipeline->user_data_0[stage]; |
|
|
|
|
|
|
|
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, |
|
|
|
base_reg + loc->sgpr_idx * 4, va, false); |
|
|
|
} |
|
|
|
|
|
|
|
if (pipeline->gs_copy_shader) { |
|
|
|
loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS]; |
|
|
|
if (loc->sgpr_idx != -1) { |
|
|
|
base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; |
|
|
|
|
|
|
|
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, |
|
|
|
base_reg + loc->sgpr_idx * 4, va, false); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) |
|
|
|
{ |
|
|
|
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) { |
|
|
|
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; |
|
|
|
struct radv_streamout_state *so = &cmd_buffer->state.streamout; |
|
|
|
unsigned so_offset; |
|
|
|
void *so_ptr; |
|
|
|
uint64_t va; |
|
|
|
|
|
|
|
/* Allocate some descriptor state for streamout buffers. */ |
|
|
|
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, |
|
|
|
MAX_SO_BUFFERS * 16, 256, |
|
|
|
&so_offset, &so_ptr)) |
|
|
|
return; |
|
|
|
|
|
|
|
for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) { |
|
|
|
struct radv_buffer *buffer = sb[i].buffer; |
|
|
|
uint32_t *desc = &((uint32_t *)so_ptr)[i * 4]; |
|
|
|
|
|
|
|
if (!(so->enabled_mask & (1 << i))) |
|
|
|
continue; |
|
|
|
|
|
|
|
va = radv_buffer_get_va(buffer->bo) + buffer->offset; |
|
|
|
|
|
|
|
/* Set the descriptor. |
|
|
|
* |
|
|
|
* On VI, the format must be non-INVALID, otherwise |
|
|
|
* the buffer will be considered not bound and store |
|
|
|
* instructions will be no-ops. |
|
|
|
*/ |
|
|
|
desc[0] = va; |
|
|
|
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); |
|
|
|
desc[2] = 0xffffffff; |
|
|
|
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | |
|
|
|
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | |
|
|
|
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | |
|
|
|
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | |
|
|
|
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); |
|
|
|
} |
|
|
|
|
|
|
|
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); |
|
|
|
va += so_offset; |
|
|
|
|
|
|
|
radv_emit_streamout_buffers(cmd_buffer, va); |
|
|
|
} |
|
|
|
|
|
|
|
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER; |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) |
|
|
|
{ |
|
|
|
radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty); |
|
|
|
radv_flush_streamout_descriptors(cmd_buffer); |
|
|
|
radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); |
|
|
|
radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); |
|
|
|
} |
|
|
@@ -1969,7 +2070,8 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | |
|
|
|
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | |
|
|
|
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | |
|
|
|
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { |
|
|
|
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | |
|
|
|
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) { |
|
|
|
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; |
|
|
|
} |
|
|
|
} |
|
|
@@ -1993,6 +2095,8 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
for_each_bit(b, src_flags) { |
|
|
|
switch ((VkAccessFlagBits)(1 << b)) { |
|
|
|
case VK_ACCESS_SHADER_WRITE_BIT: |
|
|
|
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: |
|
|
|
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: |
|
|
|
flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; |
|
|
|
break; |
|
|
|
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: |
|
|
@@ -2062,6 +2166,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
switch ((VkAccessFlagBits)(1 << b)) { |
|
|
|
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: |
|
|
|
case VK_ACCESS_INDEX_READ_BIT: |
|
|
|
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: |
|
|
|
break; |
|
|
|
case VK_ACCESS_UNIFORM_READ_BIT: |
|
|
|
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; |
|
|
@@ -2716,6 +2821,7 @@ void radv_CmdBindPipeline( |
|
|
|
cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; |
|
|
|
|
|
|
|
radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); |
|
|
|
radv_bind_streamout_state(cmd_buffer, pipeline); |
|
|
|
|
|
|
|
if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) |
|
|
|
cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; |
|
|
@@ -3138,12 +3244,13 @@ static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned in |
|
|
|
|
|
|
|
static void |
|
|
|
radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
uint32_t vertex_count) |
|
|
|
uint32_t vertex_count, |
|
|
|
bool use_opaque) |
|
|
|
{ |
|
|
|
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); |
|
|
|
radeon_emit(cmd_buffer->cs, vertex_count); |
|
|
|
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | |
|
|
|
S_0287F0_USE_OPAQUE(0)); |
|
|
|
S_0287F0_USE_OPAQUE(use_opaque)); |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
@@ -3247,6 +3354,12 @@ struct radv_draw_info { |
|
|
|
*/ |
|
|
|
struct radv_buffer *count_buffer; |
|
|
|
uint64_t count_buffer_offset; |
|
|
|
|
|
|
|
/** |
|
|
|
* Stream output parameters resource. |
|
|
|
*/ |
|
|
|
struct radv_buffer *strmout_buffer; |
|
|
|
uint64_t strmout_buffer_offset; |
|
|
|
}; |
|
|
|
|
|
|
|
static void |
|
|
@@ -3257,6 +3370,27 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
struct radeon_winsys *ws = cmd_buffer->device->ws; |
|
|
|
struct radeon_cmdbuf *cs = cmd_buffer->cs; |
|
|
|
|
|
|
|
if (info->strmout_buffer) { |
|
|
|
uint64_t va = radv_buffer_get_va(info->strmout_buffer->bo); |
|
|
|
|
|
|
|
va += info->strmout_buffer->offset + |
|
|
|
info->strmout_buffer_offset; |
|
|
|
|
|
|
|
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, |
|
|
|
info->stride); |
|
|
|
|
|
|
|
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); |
|
|
|
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | |
|
|
|
COPY_DATA_DST_SEL(COPY_DATA_REG) | |
|
|
|
COPY_DATA_WR_CONFIRM); |
|
|
|
radeon_emit(cs, va); |
|
|
|
radeon_emit(cs, va >> 32); |
|
|
|
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
|
|
|
|
radv_cs_add_buffer(ws, cs, info->strmout_buffer->bo); |
|
|
|
} |
|
|
|
|
|
|
|
if (info->indirect) { |
|
|
|
uint64_t va = radv_buffer_get_va(info->indirect->bo); |
|
|
|
uint64_t count_va = 0; |
|
|
@@ -3341,14 +3475,17 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
} |
|
|
|
} else { |
|
|
|
if (!state->subpass->view_mask) { |
|
|
|
radv_cs_emit_draw_packet(cmd_buffer, info->count); |
|
|
|
radv_cs_emit_draw_packet(cmd_buffer, |
|
|
|
info->count, |
|
|
|
!!info->strmout_buffer); |
|
|
|
} else { |
|
|
|
unsigned i; |
|
|
|
for_each_bit(i, state->subpass->view_mask) { |
|
|
|
radv_emit_view_index(cmd_buffer, i); |
|
|
|
|
|
|
|
radv_cs_emit_draw_packet(cmd_buffer, |
|
|
|
info->count); |
|
|
|
info->count, |
|
|
|
!!info->strmout_buffer); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@@ -3442,6 +3579,8 @@ static void |
|
|
|
radv_draw(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
const struct radv_draw_info *info) |
|
|
|
{ |
|
|
|
struct radeon_info *rad_info = |
|
|
|
&cmd_buffer->device->physical_device->rad_info; |
|
|
|
bool has_prefetch = |
|
|
|
cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; |
|
|
|
bool pipeline_is_dirty = |
|
|
@@ -3511,6 +3650,16 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Workaround for a VGT hang when streamout is enabled. |
|
|
|
* It must be done after drawing. |
|
|
|
*/ |
|
|
|
if (cmd_buffer->state.streamout.streamout_enabled && |
|
|
|
(rad_info->family == CHIP_HAWAII || |
|
|
|
rad_info->family == CHIP_TONGA || |
|
|
|
rad_info->family == CHIP_FIJI)) { |
|
|
|
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC; |
|
|
|
} |
|
|
|
|
|
|
|
assert(cmd_buffer->cs->cdw <= cdw_max); |
|
|
|
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH); |
|
|
|
} |
|
|
@@ -4486,3 +4635,229 @@ void radv_CmdEndConditionalRenderingEXT( |
|
|
|
cmd_buffer->state.predication_type = -1; |
|
|
|
cmd_buffer->state.predication_va = 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* VK_EXT_transform_feedback */ |
|
|
|
void radv_CmdBindTransformFeedbackBuffersEXT( |
|
|
|
VkCommandBuffer commandBuffer, |
|
|
|
uint32_t firstBinding, |
|
|
|
uint32_t bindingCount, |
|
|
|
const VkBuffer* pBuffers, |
|
|
|
const VkDeviceSize* pOffsets, |
|
|
|
const VkDeviceSize* pSizes) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
|
|
|
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; |
|
|
|
uint8_t enabled_mask = 0; |
|
|
|
|
|
|
|
assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); |
|
|
|
for (uint32_t i = 0; i < bindingCount; i++) { |
|
|
|
uint32_t idx = firstBinding + i; |
|
|
|
|
|
|
|
sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); |
|
|
|
sb[idx].offset = pOffsets[i]; |
|
|
|
sb[idx].size = pSizes[i]; |
|
|
|
|
|
|
|
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, |
|
|
|
sb[idx].buffer->bo); |
|
|
|
|
|
|
|
enabled_mask |= 1 << idx; |
|
|
|
} |
|
|
|
|
|
|
|
cmd_buffer->state.streamout.enabled_mask = enabled_mask; |
|
|
|
|
|
|
|
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer) |
|
|
|
{ |
|
|
|
struct radv_streamout_state *so = &cmd_buffer->state.streamout; |
|
|
|
struct radeon_cmdbuf *cs = cmd_buffer->cs; |
|
|
|
|
|
|
|
radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); |
|
|
|
radeon_emit(cs, |
|
|
|
S_028B94_STREAMOUT_0_EN(so->streamout_enabled) | |
|
|
|
S_028B94_RAST_STREAM(0) | |
|
|
|
S_028B94_STREAMOUT_1_EN(so->streamout_enabled) | |
|
|
|
S_028B94_STREAMOUT_2_EN(so->streamout_enabled) | |
|
|
|
S_028B94_STREAMOUT_3_EN(so->streamout_enabled)); |
|
|
|
radeon_emit(cs, so->hw_enabled_mask & |
|
|
|
so->enabled_stream_buffers_mask); |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) |
|
|
|
{ |
|
|
|
struct radv_streamout_state *so = &cmd_buffer->state.streamout; |
|
|
|
bool old_streamout_enabled = so->streamout_enabled; |
|
|
|
uint32_t old_hw_enabled_mask = so->hw_enabled_mask; |
|
|
|
|
|
|
|
so->streamout_enabled = enable; |
|
|
|
|
|
|
|
so->hw_enabled_mask = so->enabled_mask | |
|
|
|
(so->enabled_mask << 4) | |
|
|
|
(so->enabled_mask << 8) | |
|
|
|
(so->enabled_mask << 12); |
|
|
|
|
|
|
|
if ((old_streamout_enabled != so->streamout_enabled) || |
|
|
|
(old_hw_enabled_mask != so->hw_enabled_mask)) |
|
|
|
radv_emit_streamout_enable(cmd_buffer); |
|
|
|
} |
|
|
|
|
|
|
|
static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) |
|
|
|
{ |
|
|
|
struct radeon_cmdbuf *cs = cmd_buffer->cs; |
|
|
|
unsigned reg_strmout_cntl; |
|
|
|
|
|
|
|
/* The register is at different places on different ASICs. */ |
|
|
|
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { |
|
|
|
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; |
|
|
|
radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); |
|
|
|
} else { |
|
|
|
reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; |
|
|
|
radeon_set_config_reg(cs, reg_strmout_cntl, 0); |
|
|
|
} |
|
|
|
|
|
|
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
|
|
|
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); |
|
|
|
|
|
|
|
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); |
|
|
|
radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ |
|
|
|
radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ |
|
|
|
radeon_emit(cs, 0); |
|
|
|
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ |
|
|
|
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ |
|
|
|
radeon_emit(cs, 4); /* poll interval */ |
|
|
|
} |
|
|
|
|
|
|
|
void radv_CmdBeginTransformFeedbackEXT( |
|
|
|
VkCommandBuffer commandBuffer, |
|
|
|
uint32_t firstBuffer, |
|
|
|
uint32_t bufferCount, |
|
|
|
const VkBuffer* pCounterBuffers, |
|
|
|
const VkDeviceSize* pCounterBufferOffsets) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
|
|
|
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; |
|
|
|
struct radv_streamout_state *so = &cmd_buffer->state.streamout; |
|
|
|
struct radeon_cmdbuf *cs = cmd_buffer->cs; |
|
|
|
|
|
|
|
radv_flush_vgt_streamout(cmd_buffer); |
|
|
|
|
|
|
|
assert(firstBuffer + bufferCount <= MAX_SO_BUFFERS); |
|
|
|
for (uint32_t i = firstBuffer; i < bufferCount; i++) { |
|
|
|
if (!(so->enabled_mask & (1 << i))) |
|
|
|
continue; |
|
|
|
|
|
|
|
/* SI binds streamout buffers as shader resources. |
|
|
|
* VGT only counts primitives and tells the shader through |
|
|
|
* SGPRs what to do. |
|
|
|
*/ |
|
|
|
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); |
|
|
|
radeon_emit(cs, (sb[i].offset + sb[i].size) >> 2); /* BUFFER_SIZE (in DW) */ |
|
|
|
radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ |
|
|
|
|
|
|
|
if (pCounterBuffers && pCounterBuffers[i]) { |
|
|
|
/* The array of counter buffers is optional. */ |
|
|
|
RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); |
|
|
|
uint64_t va = radv_buffer_get_va(buffer->bo); |
|
|
|
|
|
|
|
va += buffer->offset + pCounterBufferOffsets[i]; |
|
|
|
|
|
|
|
/* Append */ |
|
|
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); |
|
|
|
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | |
|
|
|
STRMOUT_DATA_TYPE(1) | /* offset in bytes */ |
|
|
|
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
radeon_emit(cs, va); /* src address lo */ |
|
|
|
radeon_emit(cs, va >> 32); /* src address hi */ |
|
|
|
|
|
|
|
radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); |
|
|
|
} else { |
|
|
|
/* Start from the beginning. */ |
|
|
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); |
|
|
|
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | |
|
|
|
STRMOUT_DATA_TYPE(1) | /* offset in bytes */ |
|
|
|
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
radeon_emit(cs, sb[i].offset >> 2); /* buffer offset in DW */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
radv_set_streamout_enable(cmd_buffer, true); |
|
|
|
} |
|
|
|
|
|
|
|
void radv_CmdEndTransformFeedbackEXT( |
|
|
|
VkCommandBuffer commandBuffer, |
|
|
|
uint32_t firstBuffer, |
|
|
|
uint32_t bufferCount, |
|
|
|
const VkBuffer* pCounterBuffers, |
|
|
|
const VkDeviceSize* pCounterBufferOffsets) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
|
|
|
struct radv_streamout_state *so = &cmd_buffer->state.streamout; |
|
|
|
struct radeon_cmdbuf *cs = cmd_buffer->cs; |
|
|
|
|
|
|
|
radv_flush_vgt_streamout(cmd_buffer); |
|
|
|
|
|
|
|
assert(firstBuffer + bufferCount <= MAX_SO_BUFFERS); |
|
|
|
for (uint32_t i = firstBuffer; i < bufferCount; i++) { |
|
|
|
if (!(so->enabled_mask & (1 << i))) |
|
|
|
continue; |
|
|
|
|
|
|
|
if (pCounterBuffers && pCounterBuffers[i]) { |
|
|
|
/* The array of counters buffer is optional. */ |
|
|
|
RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); |
|
|
|
uint64_t va = radv_buffer_get_va(buffer->bo); |
|
|
|
|
|
|
|
va += buffer->offset + pCounterBufferOffsets[i]; |
|
|
|
|
|
|
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); |
|
|
|
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | |
|
|
|
STRMOUT_DATA_TYPE(1) | /* offset in bytes */ |
|
|
|
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | |
|
|
|
STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ |
|
|
|
radeon_emit(cs, va); /* dst address lo */ |
|
|
|
radeon_emit(cs, va >> 32); /* dst address hi */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
radeon_emit(cs, 0); /* unused */ |
|
|
|
|
|
|
|
radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); |
|
|
|
} |
|
|
|
|
|
|
|
/* Deactivate transform feedback by zeroing the buffer size. |
|
|
|
* The counters (primitives generated, primitives emitted) may |
|
|
|
* be enabled even if there is not buffer bound. This ensures |
|
|
|
* that the primitives-emitted query won't increment. |
|
|
|
*/ |
|
|
|
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); |
|
|
|
} |
|
|
|
|
|
|
|
radv_set_streamout_enable(cmd_buffer, false); |
|
|
|
} |
|
|
|
|
|
|
|
void radv_CmdDrawIndirectByteCountEXT( |
|
|
|
VkCommandBuffer commandBuffer, |
|
|
|
uint32_t instanceCount, |
|
|
|
uint32_t firstInstance, |
|
|
|
VkBuffer _counterBuffer, |
|
|
|
VkDeviceSize counterBufferOffset, |
|
|
|
uint32_t counterOffset, |
|
|
|
uint32_t vertexStride) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
|
|
|
RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); |
|
|
|
struct radv_draw_info info = {}; |
|
|
|
|
|
|
|
info.instance_count = instanceCount; |
|
|
|
info.first_instance = firstInstance; |
|
|
|
info.strmout_buffer = counterBuffer; |
|
|
|
info.strmout_buffer_offset = counterBufferOffset; |
|
|
|
info.stride = vertexStride; |
|
|
|
|
|
|
|
radv_draw(cmd_buffer, &info); |
|
|
|
} |