A simple Vulkan extension that allows apps to query size and usage of all exposed memory heaps. The different usage values are not really accurate because they are per drm-fd, but they should be close enough. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Alex Smith <asmith@feralinteractive.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>tags/19.0-branchpoint
| @@ -1350,12 +1350,84 @@ void radv_GetPhysicalDeviceMemoryProperties( | |||
| *pMemoryProperties = physical_device->memory_properties; | |||
| } | |||
| static void | |||
| radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, | |||
| VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget) | |||
| { | |||
| RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); | |||
| VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; | |||
| uint64_t visible_vram_size = radv_get_visible_vram_size(device); | |||
| uint64_t vram_size = radv_get_vram_size(device); | |||
| uint64_t gtt_size = device->rad_info.gart_size; | |||
| uint64_t heap_budget, heap_usage; | |||
| /* For all memory heaps, the computation of budget is as follow: | |||
| * heap_budget = heap_size - global_heap_usage + app_heap_usage | |||
| * | |||
| * The Vulkan spec 1.1.97 says that the budget should include any | |||
| * currently allocated device memory. | |||
| * | |||
| * Note that the application heap usages are not really accurate (eg. | |||
| * in presence of shared buffers). | |||
| */ | |||
| if (vram_size) { | |||
| heap_usage = device->ws->query_value(device->ws, | |||
| RADEON_ALLOCATED_VRAM); | |||
| heap_budget = vram_size - | |||
| device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + | |||
| heap_usage; | |||
| memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget; | |||
| memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage; | |||
| } | |||
| if (visible_vram_size) { | |||
| heap_usage = device->ws->query_value(device->ws, | |||
| RADEON_ALLOCATED_VRAM_VIS); | |||
| heap_budget = visible_vram_size - | |||
| device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + | |||
| heap_usage; | |||
| memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget; | |||
| memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage; | |||
| } | |||
| if (gtt_size) { | |||
| heap_usage = device->ws->query_value(device->ws, | |||
| RADEON_ALLOCATED_GTT); | |||
| heap_budget = gtt_size - | |||
| device->ws->query_value(device->ws, RADEON_GTT_USAGE) + | |||
| heap_usage; | |||
| memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget; | |||
| memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage; | |||
| } | |||
| /* The heapBudget and heapUsage values must be zero for array elements | |||
| * greater than or equal to | |||
| * VkPhysicalDeviceMemoryProperties::memoryHeapCount. | |||
| */ | |||
| for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) { | |||
| memoryBudget->heapBudget[i] = 0; | |||
| memoryBudget->heapUsage[i] = 0; | |||
| } | |||
| } | |||
| void radv_GetPhysicalDeviceMemoryProperties2( | |||
| VkPhysicalDevice physicalDevice, | |||
| VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) | |||
| { | |||
| radv_GetPhysicalDeviceMemoryProperties(physicalDevice, | |||
| &pMemoryProperties->memoryProperties); | |||
| VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget = | |||
| vk_find_struct(pMemoryProperties->pNext, | |||
| PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT); | |||
| if (memory_budget) | |||
| radv_get_memory_budget_properties(physicalDevice, memory_budget); | |||
| } | |||
| VkResult radv_GetMemoryHostPointerPropertiesEXT( | |||
| @@ -105,6 +105,7 @@ EXTENSIONS = [ | |||
| Extension('VK_EXT_external_memory_dma_buf', 1, True), | |||
| Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'), | |||
| Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), | |||
| Extension('VK_EXT_memory_budget', 1, True), | |||
| Extension('VK_EXT_pci_bus_info', 2, True), | |||
| Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), | |||
| Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'), | |||
| @@ -84,6 +84,9 @@ enum radeon_ctx_priority { | |||
| }; | |||
| enum radeon_value_id { | |||
| RADEON_ALLOCATED_VRAM, | |||
| RADEON_ALLOCATED_VRAM_VIS, | |||
| RADEON_ALLOCATED_GTT, | |||
| RADEON_TIMESTAMP, | |||
| RADEON_NUM_BYTES_MOVED, | |||
| RADEON_NUM_EVICTIONS, | |||
| @@ -164,6 +167,7 @@ struct radeon_winsys_fence; | |||
| struct radeon_winsys_bo { | |||
| uint64_t va; | |||
| bool is_local; | |||
| bool vram_cpu_access; | |||
| }; | |||
| struct radv_winsys_sem_counts { | |||
| uint32_t syncobj_count; | |||
| @@ -249,6 +249,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, | |||
| static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) | |||
| { | |||
| struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
| struct radv_amdgpu_winsys *ws = bo->ws; | |||
| if (p_atomic_dec_return(&bo->ref_count)) | |||
| return; | |||
| @@ -269,6 +270,17 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) | |||
| 0, AMDGPU_VA_OP_UNMAP); | |||
| amdgpu_bo_free(bo->bo); | |||
| } | |||
| if (bo->initial_domain & RADEON_DOMAIN_VRAM) | |||
| p_atomic_add(&ws->allocated_vram, | |||
| -align64(bo->size, ws->info.gart_page_size)); | |||
| if (bo->base.vram_cpu_access) | |||
| p_atomic_add(&ws->allocated_vram_vis, | |||
| -align64(bo->size, ws->info.gart_page_size)); | |||
| if (bo->initial_domain & RADEON_DOMAIN_GTT) | |||
| p_atomic_add(&ws->allocated_gtt, | |||
| -align64(bo->size, ws->info.gart_page_size)); | |||
| amdgpu_va_range_free(bo->va_handle); | |||
| FREE(bo); | |||
| } | |||
| @@ -344,8 +356,10 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, | |||
| if (initial_domain & RADEON_DOMAIN_GTT) | |||
| request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; | |||
| if (flags & RADEON_FLAG_CPU_ACCESS) | |||
| if (flags & RADEON_FLAG_CPU_ACCESS) { | |||
| bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM; | |||
| request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | |||
| } | |||
| if (flags & RADEON_FLAG_NO_CPU_ACCESS) | |||
| request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | |||
| if (flags & RADEON_FLAG_GTT_WC) | |||
| @@ -378,6 +392,17 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, | |||
| bo->bo = buf_handle; | |||
| bo->initial_domain = initial_domain; | |||
| bo->is_shared = false; | |||
| if (initial_domain & RADEON_DOMAIN_VRAM) | |||
| p_atomic_add(&ws->allocated_vram, | |||
| align64(bo->size, ws->info.gart_page_size)); | |||
| if (bo->base.vram_cpu_access) | |||
| p_atomic_add(&ws->allocated_vram_vis, | |||
| align64(bo->size, ws->info.gart_page_size)); | |||
| if (initial_domain & RADEON_DOMAIN_GTT) | |||
| p_atomic_add(&ws->allocated_gtt, | |||
| align64(bo->size, ws->info.gart_page_size)); | |||
| radv_amdgpu_add_buffer_to_global_list(bo); | |||
| return (struct radeon_winsys_bo *)bo; | |||
| error_va_map: | |||
| @@ -474,6 +499,9 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, | |||
| bo->bo = buf_handle; | |||
| bo->initial_domain = RADEON_DOMAIN_GTT; | |||
| p_atomic_add(&ws->allocated_gtt, | |||
| align64(bo->size, ws->info.gart_page_size)); | |||
| radv_amdgpu_add_buffer_to_global_list(bo); | |||
| return (struct radeon_winsys_bo *)bo; | |||
| @@ -538,6 +566,14 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, | |||
| bo->is_shared = true; | |||
| bo->ws = ws; | |||
| bo->ref_count = 1; | |||
| if (bo->initial_domain & RADEON_DOMAIN_VRAM) | |||
| p_atomic_add(&ws->allocated_vram, | |||
| align64(bo->size, ws->info.gart_page_size)); | |||
| if (bo->initial_domain & RADEON_DOMAIN_GTT) | |||
| p_atomic_add(&ws->allocated_gtt, | |||
| align64(bo->size, ws->info.gart_page_size)); | |||
| radv_amdgpu_add_buffer_to_global_list(bo); | |||
| return (struct radeon_winsys_bo *)bo; | |||
| error_va_map: | |||
| @@ -72,6 +72,12 @@ static uint64_t radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, | |||
| uint64_t retval = 0; | |||
| switch (value) { | |||
| case RADEON_ALLOCATED_VRAM: | |||
| return ws->allocated_vram; | |||
| case RADEON_ALLOCATED_VRAM_VIS: | |||
| return ws->allocated_vram_vis; | |||
| case RADEON_ALLOCATED_GTT: | |||
| return ws->allocated_gtt; | |||
| case RADEON_TIMESTAMP: | |||
| amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval); | |||
| return retval; | |||
| @@ -52,6 +52,10 @@ struct radv_amdgpu_winsys { | |||
| pthread_mutex_t global_bo_list_lock; | |||
| struct list_head global_bo_list; | |||
| uint64_t allocated_vram; | |||
| uint64_t allocated_vram_vis; | |||
| uint64_t allocated_gtt; | |||
| }; | |||
| static inline struct radv_amdgpu_winsys * | |||