Reviewed-by: Dave Airlie <airlied@redhat.com>tags/18.1-branchpoint
@@ -2907,6 +2907,17 @@ static uint64_t radv_get_absolute_timeout(uint64_t timeout) | |||
return current_time + timeout; | |||
} | |||
static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences) | |||
{ | |||
for (uint32_t i = 0; i < fenceCount; ++i) { | |||
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); | |||
if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted)) | |||
return false; | |||
} | |||
return true; | |||
} | |||
VkResult radv_WaitForFences( | |||
VkDevice _device, | |||
uint32_t fenceCount, | |||
@@ -2918,6 +2929,31 @@ VkResult radv_WaitForFences( | |||
timeout = radv_get_absolute_timeout(timeout); | |||
if (!waitAll && fenceCount > 1) { | |||
/* Not doing this by default for waitAll, due to needing to allocate twice. */ | |||
if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) { | |||
uint32_t wait_count = 0; | |||
struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount); | |||
if (!fences) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
for (uint32_t i = 0; i < fenceCount; ++i) { | |||
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); | |||
if (fence->signalled) { | |||
free(fences); | |||
return VK_SUCCESS; | |||
} | |||
fences[wait_count++] = fence->fence; | |||
} | |||
bool success = device->ws->fences_wait(device->ws, fences, wait_count, | |||
waitAll, timeout - radv_get_current_time()); | |||
free(fences); | |||
return success ? VK_SUCCESS : VK_TIMEOUT; | |||
} | |||
while(radv_get_current_time() <= timeout) { | |||
for (uint32_t i = 0; i < fenceCount; ++i) { | |||
if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS) |
@@ -270,6 +270,11 @@ struct radeon_winsys { | |||
struct radeon_winsys_fence *fence, | |||
bool absolute, | |||
uint64_t timeout); | |||
bool (*fences_wait)(struct radeon_winsys *ws, | |||
struct radeon_winsys_fence *const *fences, | |||
uint32_t fence_count, | |||
bool wait_all, | |||
uint64_t timeout); | |||
/* old semaphores - non shareable */ | |||
struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws); |
@@ -154,6 +154,39 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws, | |||
return false; | |||
} | |||
static bool radv_amdgpu_fences_wait(struct radeon_winsys *_ws, | |||
struct radeon_winsys_fence *const *_fences, | |||
uint32_t fence_count, | |||
bool wait_all, | |||
uint64_t timeout) | |||
{ | |||
struct amdgpu_cs_fence *fences = malloc(sizeof(struct amdgpu_cs_fence) * fence_count); | |||
int r; | |||
uint32_t expired = 0, first = 0; | |||
if (!fences) | |||
return false; | |||
for (uint32_t i = 0; i < fence_count; ++i) | |||
fences[i] = ((struct radv_amdgpu_fence *)_fences[i])->fence; | |||
/* Now use the libdrm query. */ | |||
r = amdgpu_cs_wait_fences(fences, fence_count, wait_all, | |||
timeout, &expired, &first); | |||
free(fences); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: amdgpu_cs_wait_fences failed.\n"); | |||
return false; | |||
} | |||
if (expired) | |||
return true; | |||
return false; | |||
} | |||
static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs); | |||
@@ -1387,4 +1420,5 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) | |||
ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file; | |||
ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file; | |||
ws->base.fence_wait = radv_amdgpu_fence_wait; | |||
ws->base.fences_wait = radv_amdgpu_fences_wait; | |||
} |