|
|
@@ -3516,172 +3516,211 @@ radv_alloc_sem_info(struct radv_instance *instance, |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
/* Signals fence as soon as all the work currently put on queue is done. */ |
|
|
|
static VkResult radv_signal_fence(struct radv_queue *queue, |
|
|
|
struct radv_fence *fence) |
|
|
|
{ |
|
|
|
int ret; |
|
|
|
VkResult result; |
|
|
|
struct radv_winsys_sem_info sem_info; |
|
|
|
|
|
|
|
result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL, |
|
|
|
radv_fence_to_handle(fence)); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
|
|
|
|
ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, |
|
|
|
&queue->device->empty_cs[queue->queue_family_index], |
|
|
|
1, NULL, NULL, &sem_info, NULL, |
|
|
|
false, fence->fence); |
|
|
|
radv_free_sem_info(&sem_info); |
|
|
|
|
|
|
|
if (ret) |
|
|
|
return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST); |
|
|
|
|
|
|
|
return VK_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
VkResult radv_QueueSubmit( |
|
|
|
VkQueue _queue, |
|
|
|
uint32_t submitCount, |
|
|
|
const VkSubmitInfo* pSubmits, |
|
|
|
VkFence _fence) |
|
|
|
static VkResult |
|
|
|
radv_get_preambles(struct radv_queue *queue, |
|
|
|
const VkCommandBuffer *cmd_buffers, |
|
|
|
uint32_t cmd_buffer_count, |
|
|
|
struct radeon_cmdbuf **initial_full_flush_preamble_cs, |
|
|
|
struct radeon_cmdbuf **initial_preamble_cs, |
|
|
|
struct radeon_cmdbuf **continue_preamble_cs) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_queue, queue, _queue); |
|
|
|
RADV_FROM_HANDLE(radv_fence, fence, _fence); |
|
|
|
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; |
|
|
|
struct radeon_winsys_ctx *ctx = queue->hw_ctx; |
|
|
|
int ret; |
|
|
|
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; |
|
|
|
uint32_t scratch_size = 0; |
|
|
|
uint32_t compute_scratch_size = 0; |
|
|
|
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; |
|
|
|
struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL; |
|
|
|
VkResult result; |
|
|
|
bool fence_emitted = false; |
|
|
|
bool tess_rings_needed = false; |
|
|
|
bool gds_needed = false; |
|
|
|
bool sample_positions_needed = false; |
|
|
|
|
|
|
|
/* Do this first so failing to allocate scratch buffers can't result in |
|
|
|
* partially executed submissions. */ |
|
|
|
for (uint32_t i = 0; i < submitCount; i++) { |
|
|
|
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, |
|
|
|
pSubmits[i].pCommandBuffers[j]); |
|
|
|
for (uint32_t j = 0; j < cmd_buffer_count; j++) { |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, |
|
|
|
cmd_buffers[j]); |
|
|
|
|
|
|
|
scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed); |
|
|
|
compute_scratch_size = MAX2(compute_scratch_size, |
|
|
|
cmd_buffer->compute_scratch_size_needed); |
|
|
|
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed); |
|
|
|
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); |
|
|
|
tess_rings_needed |= cmd_buffer->tess_rings_needed; |
|
|
|
gds_needed |= cmd_buffer->gds_needed; |
|
|
|
sample_positions_needed |= cmd_buffer->sample_positions_needed; |
|
|
|
} |
|
|
|
scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed); |
|
|
|
compute_scratch_size = MAX2(compute_scratch_size, |
|
|
|
cmd_buffer->compute_scratch_size_needed); |
|
|
|
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed); |
|
|
|
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); |
|
|
|
tess_rings_needed |= cmd_buffer->tess_rings_needed; |
|
|
|
gds_needed |= cmd_buffer->gds_needed; |
|
|
|
sample_positions_needed |= cmd_buffer->sample_positions_needed; |
|
|
|
} |
|
|
|
|
|
|
|
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, |
|
|
|
return radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, |
|
|
|
esgs_ring_size, gsvs_ring_size, tess_rings_needed, |
|
|
|
gds_needed, sample_positions_needed, |
|
|
|
&initial_flush_preamble_cs, |
|
|
|
&initial_preamble_cs, &continue_preamble_cs); |
|
|
|
gds_needed, sample_positions_needed, |
|
|
|
initial_full_flush_preamble_cs, |
|
|
|
initial_preamble_cs, continue_preamble_cs); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
struct radv_queue_submission { |
|
|
|
const VkCommandBuffer *cmd_buffers; |
|
|
|
uint32_t cmd_buffer_count; |
|
|
|
bool flush_caches; |
|
|
|
VkPipelineStageFlags wait_dst_stage_mask; |
|
|
|
const VkSemaphore *wait_semaphores; |
|
|
|
uint32_t wait_semaphore_count; |
|
|
|
const VkSemaphore *signal_semaphores; |
|
|
|
uint32_t signal_semaphore_count; |
|
|
|
VkFence fence; |
|
|
|
}; |
|
|
|
|
|
|
|
static VkResult |
|
|
|
radv_queue_submit(struct radv_queue *queue, |
|
|
|
const struct radv_queue_submission *submission) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_fence, fence, submission->fence); |
|
|
|
struct radeon_cmdbuf **cs_array; |
|
|
|
struct radeon_winsys_ctx *ctx = queue->hw_ctx; |
|
|
|
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; |
|
|
|
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; |
|
|
|
bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask; |
|
|
|
bool can_patch = true; |
|
|
|
uint32_t advance; |
|
|
|
struct radv_winsys_sem_info sem_info; |
|
|
|
VkResult result; |
|
|
|
int ret; |
|
|
|
struct radeon_cmdbuf *initial_preamble_cs = NULL; |
|
|
|
struct radeon_cmdbuf *initial_flush_preamble_cs = NULL; |
|
|
|
struct radeon_cmdbuf *continue_preamble_cs = NULL; |
|
|
|
|
|
|
|
result = radv_get_preambles(queue, submission->cmd_buffers, |
|
|
|
submission->cmd_buffer_count, |
|
|
|
&initial_preamble_cs, |
|
|
|
&initial_flush_preamble_cs, |
|
|
|
&continue_preamble_cs); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
|
|
|
|
for (uint32_t i = 0; i < submitCount; i++) { |
|
|
|
struct radeon_cmdbuf **cs_array; |
|
|
|
bool do_flush = !i || pSubmits[i].pWaitDstStageMask; |
|
|
|
bool can_patch = true; |
|
|
|
uint32_t advance; |
|
|
|
struct radv_winsys_sem_info sem_info; |
|
|
|
|
|
|
|
result = radv_alloc_sem_info(queue->device->instance, |
|
|
|
&sem_info, |
|
|
|
pSubmits[i].waitSemaphoreCount, |
|
|
|
pSubmits[i].pWaitSemaphores, |
|
|
|
pSubmits[i].signalSemaphoreCount, |
|
|
|
pSubmits[i].pSignalSemaphores, |
|
|
|
_fence); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
result = radv_alloc_sem_info(queue->device->instance, |
|
|
|
&sem_info, |
|
|
|
submission->wait_semaphore_count, |
|
|
|
submission->wait_semaphores, |
|
|
|
submission->signal_semaphore_count, |
|
|
|
submission->signal_semaphores, |
|
|
|
submission->fence); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
|
|
|
|
if (!pSubmits[i].commandBufferCount) { |
|
|
|
if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { |
|
|
|
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, |
|
|
|
&queue->device->empty_cs[queue->queue_family_index], |
|
|
|
1, NULL, NULL, |
|
|
|
&sem_info, NULL, |
|
|
|
false, base_fence); |
|
|
|
if (ret) { |
|
|
|
radv_loge("failed to submit CS %d\n", i); |
|
|
|
abort(); |
|
|
|
} |
|
|
|
fence_emitted = true; |
|
|
|
} |
|
|
|
radv_free_sem_info(&sem_info); |
|
|
|
continue; |
|
|
|
if (!submission->cmd_buffer_count) { |
|
|
|
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, |
|
|
|
&queue->device->empty_cs[queue->queue_family_index], |
|
|
|
1, NULL, NULL, |
|
|
|
&sem_info, NULL, |
|
|
|
false, base_fence); |
|
|
|
if (ret) { |
|
|
|
radv_loge("failed to submit CS\n"); |
|
|
|
abort(); |
|
|
|
} |
|
|
|
radv_free_sem_info(&sem_info); |
|
|
|
return VK_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
cs_array = malloc(sizeof(struct radeon_cmdbuf *) * |
|
|
|
(pSubmits[i].commandBufferCount)); |
|
|
|
cs_array = malloc(sizeof(struct radeon_cmdbuf *) * |
|
|
|
(submission->cmd_buffer_count)); |
|
|
|
|
|
|
|
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, |
|
|
|
pSubmits[i].pCommandBuffers[j]); |
|
|
|
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); |
|
|
|
for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) { |
|
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]); |
|
|
|
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); |
|
|
|
|
|
|
|
cs_array[j] = cmd_buffer->cs; |
|
|
|
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) |
|
|
|
can_patch = false; |
|
|
|
cs_array[j] = cmd_buffer->cs; |
|
|
|
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) |
|
|
|
can_patch = false; |
|
|
|
|
|
|
|
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING; |
|
|
|
} |
|
|
|
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING; |
|
|
|
} |
|
|
|
|
|
|
|
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { |
|
|
|
struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs; |
|
|
|
const struct radv_winsys_bo_list *bo_list = NULL; |
|
|
|
for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) { |
|
|
|
struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs; |
|
|
|
const struct radv_winsys_bo_list *bo_list = NULL; |
|
|
|
|
|
|
|
advance = MIN2(max_cs_submission, |
|
|
|
pSubmits[i].commandBufferCount - j); |
|
|
|
advance = MIN2(max_cs_submission, |
|
|
|
submission->cmd_buffer_count - j); |
|
|
|
|
|
|
|
if (queue->device->trace_bo) |
|
|
|
*queue->device->trace_id_ptr = 0; |
|
|
|
if (queue->device->trace_bo) |
|
|
|
*queue->device->trace_id_ptr = 0; |
|
|
|
|
|
|
|
sem_info.cs_emit_wait = j == 0; |
|
|
|
sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount; |
|
|
|
sem_info.cs_emit_wait = j == 0; |
|
|
|
sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count; |
|
|
|
|
|
|
|
if (unlikely(queue->device->use_global_bo_list)) { |
|
|
|
pthread_mutex_lock(&queue->device->bo_list.mutex); |
|
|
|
bo_list = &queue->device->bo_list.list; |
|
|
|
} |
|
|
|
if (unlikely(queue->device->use_global_bo_list)) { |
|
|
|
pthread_mutex_lock(&queue->device->bo_list.mutex); |
|
|
|
bo_list = &queue->device->bo_list.list; |
|
|
|
} |
|
|
|
|
|
|
|
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, |
|
|
|
advance, initial_preamble, continue_preamble_cs, |
|
|
|
&sem_info, bo_list, |
|
|
|
can_patch, base_fence); |
|
|
|
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, |
|
|
|
advance, initial_preamble, continue_preamble_cs, |
|
|
|
&sem_info, bo_list, |
|
|
|
can_patch, base_fence); |
|
|
|
|
|
|
|
if (unlikely(queue->device->use_global_bo_list)) |
|
|
|
pthread_mutex_unlock(&queue->device->bo_list.mutex); |
|
|
|
if (unlikely(queue->device->use_global_bo_list)) |
|
|
|
pthread_mutex_unlock(&queue->device->bo_list.mutex); |
|
|
|
|
|
|
|
if (ret) { |
|
|
|
radv_loge("failed to submit CS %d\n", i); |
|
|
|
abort(); |
|
|
|
} |
|
|
|
fence_emitted = true; |
|
|
|
if (queue->device->trace_bo) { |
|
|
|
radv_check_gpu_hangs(queue, cs_array[j]); |
|
|
|
} |
|
|
|
if (ret) { |
|
|
|
radv_loge("failed to submit CS\n"); |
|
|
|
abort(); |
|
|
|
} |
|
|
|
if (queue->device->trace_bo) { |
|
|
|
radv_check_gpu_hangs(queue, cs_array[j]); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
radv_free_temp_syncobjs(queue->device, |
|
|
|
pSubmits[i].waitSemaphoreCount, |
|
|
|
pSubmits[i].pWaitSemaphores); |
|
|
|
radv_free_sem_info(&sem_info); |
|
|
|
free(cs_array); |
|
|
|
radv_free_temp_syncobjs(queue->device, |
|
|
|
submission->wait_semaphore_count, |
|
|
|
submission->wait_semaphores); |
|
|
|
radv_free_sem_info(&sem_info); |
|
|
|
free(cs_array); |
|
|
|
return VK_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
/* Signals fence as soon as all the work currently put on queue is done. */ |
|
|
|
static VkResult radv_signal_fence(struct radv_queue *queue, |
|
|
|
VkFence fence) |
|
|
|
{ |
|
|
|
return radv_queue_submit(queue, &(struct radv_queue_submission) { |
|
|
|
.fence = fence |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
VkResult radv_QueueSubmit( |
|
|
|
VkQueue _queue, |
|
|
|
uint32_t submitCount, |
|
|
|
const VkSubmitInfo* pSubmits, |
|
|
|
VkFence fence) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_queue, queue, _queue); |
|
|
|
VkResult result; |
|
|
|
bool fence_emitted = false; |
|
|
|
|
|
|
|
for (uint32_t i = 0; i < submitCount; i++) { |
|
|
|
if (!pSubmits[i].commandBufferCount && |
|
|
|
!pSubmits[i].waitSemaphoreCount && |
|
|
|
!pSubmits[i].signalSemaphoreCount) |
|
|
|
continue; |
|
|
|
|
|
|
|
VkPipelineStageFlags wait_dst_stage_mask = 0; |
|
|
|
for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) { |
|
|
|
wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j]; |
|
|
|
} |
|
|
|
|
|
|
|
result = radv_queue_submit(queue, &(struct radv_queue_submission) { |
|
|
|
.cmd_buffers = pSubmits[i].pCommandBuffers, |
|
|
|
.cmd_buffer_count = pSubmits[i].commandBufferCount, |
|
|
|
.wait_dst_stage_mask = wait_dst_stage_mask, |
|
|
|
.flush_caches = !fence_emitted, |
|
|
|
.wait_semaphores = pSubmits[i].pWaitSemaphores, |
|
|
|
.wait_semaphore_count = pSubmits[i].waitSemaphoreCount, |
|
|
|
.signal_semaphores = pSubmits[i].pSignalSemaphores, |
|
|
|
.signal_semaphore_count = pSubmits[i].signalSemaphoreCount, |
|
|
|
.fence = fence |
|
|
|
}); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
|
|
|
|
fence_emitted = true; |
|
|
|
} |
|
|
|
|
|
|
|
if (fence) { |
|
|
|
if (fence != VK_NULL_HANDLE) { |
|
|
|
if (!fence_emitted) { |
|
|
|
result = radv_signal_fence(queue, fence); |
|
|
|
if (result != VK_SUCCESS) |
|
|
@@ -4308,17 +4347,13 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, |
|
|
|
VkQueue _queue, |
|
|
|
uint32_t bindInfoCount, |
|
|
|
const VkBindSparseInfo* pBindInfo, |
|
|
|
VkFence _fence) |
|
|
|
VkFence fence) |
|
|
|
{ |
|
|
|
RADV_FROM_HANDLE(radv_fence, fence, _fence); |
|
|
|
RADV_FROM_HANDLE(radv_queue, queue, _queue); |
|
|
|
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; |
|
|
|
bool fence_emitted = false; |
|
|
|
VkResult result; |
|
|
|
int ret; |
|
|
|
|
|
|
|
for (uint32_t i = 0; i < bindInfoCount; ++i) { |
|
|
|
struct radv_winsys_sem_info sem_info; |
|
|
|
for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { |
|
|
|
radv_sparse_buffer_bind_memory(queue->device, |
|
|
|
pBindInfo[i].pBufferBinds + j); |
|
|
@@ -4329,36 +4364,25 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, |
|
|
|
pBindInfo[i].pImageOpaqueBinds + j); |
|
|
|
} |
|
|
|
|
|
|
|
VkResult result; |
|
|
|
result = radv_alloc_sem_info(queue->device->instance, |
|
|
|
&sem_info, |
|
|
|
pBindInfo[i].waitSemaphoreCount, |
|
|
|
pBindInfo[i].pWaitSemaphores, |
|
|
|
pBindInfo[i].signalSemaphoreCount, |
|
|
|
pBindInfo[i].pSignalSemaphores, |
|
|
|
_fence); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
|
|
|
|
if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) { |
|
|
|
ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, |
|
|
|
&queue->device->empty_cs[queue->queue_family_index], |
|
|
|
1, NULL, NULL, |
|
|
|
&sem_info, NULL, |
|
|
|
false, base_fence); |
|
|
|
if (ret) { |
|
|
|
radv_loge("failed to submit CS %d\n", i); |
|
|
|
abort(); |
|
|
|
} |
|
|
|
if (!pBindInfo[i].waitSemaphoreCount && |
|
|
|
!pBindInfo[i].signalSemaphoreCount) |
|
|
|
continue; |
|
|
|
|
|
|
|
fence_emitted = true; |
|
|
|
} |
|
|
|
VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) { |
|
|
|
.wait_semaphores = pBindInfo[i].pWaitSemaphores, |
|
|
|
.wait_semaphore_count = pBindInfo[i].waitSemaphoreCount, |
|
|
|
.signal_semaphores = pBindInfo[i].pSignalSemaphores, |
|
|
|
.signal_semaphore_count = pBindInfo[i].signalSemaphoreCount, |
|
|
|
.fence = fence |
|
|
|
}); |
|
|
|
|
|
|
|
radv_free_sem_info(&sem_info); |
|
|
|
if (result != VK_SUCCESS) |
|
|
|
return result; |
|
|
|
|
|
|
|
fence_emitted = true; |
|
|
|
} |
|
|
|
|
|
|
|
if (fence) { |
|
|
|
if (fence != VK_NULL_HANDLE) { |
|
|
|
if (!fence_emitted) { |
|
|
|
result = radv_signal_fence(queue, fence); |
|
|
|
if (result != VK_SUCCESS) |