This squashes all the radv development up until now into one for merging. History can be found: https://github.com/airlied/mesa/tree/semi-interesting This requires llvm 3.9 and is in no way considered a conformant vulkan implementation. It can run a number of vulkan applications, and supports all GPUs using the amdgpu kernel driver. Thanks to Intel for providing anv and spirv->nir, and Emil Velikov for reviewing build integration. Parts of this are: Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net> Authors: Bas Nieuwenhuizen and Dave Airlie Signed-off-by: Dave Airlie <airlied@redhat.com>tags/13.0-branchpoint
@@ -1715,6 +1715,10 @@ if test -n "$with_vulkan_drivers"; then | |||
HAVE_INTEL_VULKAN=yes; | |||
;; | |||
xradeon) | |||
PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED]) | |||
HAVE_RADEON_VULKAN=yes; | |||
;; | |||
*) | |||
AC_MSG_ERROR([Vulkan driver '$driver' does not exist]) | |||
;; | |||
@@ -2198,7 +2202,7 @@ if test "x$enable_gallium_llvm" = xauto; then | |||
i*86|x86_64|amd64) enable_gallium_llvm=yes;; | |||
esac | |||
fi | |||
if test "x$enable_gallium_llvm" = xyes; then | |||
if test "x$enable_gallium_llvm" = xyes || test "x$HAVE_RADEON_VULKAN" = xyes; then | |||
if test -n "$llvm_prefix"; then | |||
AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no], ["$llvm_prefix/bin"]) | |||
else | |||
@@ -2368,10 +2372,7 @@ radeon_llvm_check() { | |||
else | |||
amdgpu_llvm_target_name='amdgpu' | |||
fi | |||
if test "x$enable_gallium_llvm" != "xyes"; then | |||
AC_MSG_ERROR([--enable-gallium-llvm is required when building $1]) | |||
fi | |||
llvm_check_version_for "3" "6" "0" $1 | |||
llvm_check_version_for $2 $3 $4 $1 | |||
if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then | |||
AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.]) | |||
fi | |||
@@ -2382,6 +2383,13 @@ radeon_llvm_check() { | |||
fi | |||
} | |||
radeon_gallium_llvm_check() { | |||
if test "x$enable_gallium_llvm" != "xyes"; then | |||
AC_MSG_ERROR([--enable-gallium-llvm is required when building $1]) | |||
fi | |||
radeon_llvm_check $* | |||
} | |||
swr_llvm_check() { | |||
gallium_require_llvm $1 | |||
if test ${LLVM_VERSION_INT} -lt 306; then | |||
@@ -2466,7 +2474,7 @@ if test -n "$with_gallium_drivers"; then | |||
gallium_require_drm "Gallium R600" | |||
gallium_require_drm_loader | |||
if test "x$enable_opencl" = xyes; then | |||
radeon_llvm_check "r600g" | |||
radeon_gallium_llvm_check "r600g" "3" "6" "0" | |||
LLVM_COMPONENTS="${LLVM_COMPONENTS} bitreader asmparser" | |||
fi | |||
;; | |||
@@ -2476,7 +2484,7 @@ if test -n "$with_gallium_drivers"; then | |||
PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED]) | |||
gallium_require_drm "radeonsi" | |||
gallium_require_drm_loader | |||
radeon_llvm_check "radeonsi" | |||
radeon_gallium_llvm_check "radeonsi" "3" "6" "0" | |||
require_egl_drm "radeonsi" | |||
;; | |||
xnouveau) | |||
@@ -2541,6 +2549,10 @@ if test -n "$with_gallium_drivers"; then | |||
done | |||
fi | |||
if test "x$HAVE_RADEON_VULKAN" != "x0"; then | |||
radeon_llvm_check "radv" "3" "9" "0" | |||
fi | |||
dnl Set LLVM_LIBS - This is done after the driver configuration so | |||
dnl that drivers can add additional components to LLVM_COMPONENTS. | |||
dnl Previously, gallium drivers were updating LLVM_LIBS directly | |||
@@ -2632,8 +2644,13 @@ AM_CONDITIONAL(HAVE_R200_DRI, test x$HAVE_R200_DRI = xyes) | |||
AM_CONDITIONAL(HAVE_RADEON_DRI, test x$HAVE_RADEON_DRI = xyes) | |||
AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes) | |||
AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes) | |||
AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes) | |||
AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_R600" = xyes -o \ | |||
"x$HAVE_GALLIUM_RADEONSI" = xyes -o \ | |||
"x$HAVE_RADEON_VULKAN" = xyes) | |||
AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \ | |||
"x$HAVE_I965_DRI" = xyes) | |||
@@ -2726,6 +2743,8 @@ dnl Substitute the config | |||
AC_CONFIG_FILES([Makefile | |||
src/Makefile | |||
src/amd/Makefile | |||
src/amd/common/Makefile | |||
src/amd/vulkan/Makefile | |||
src/compiler/Makefile | |||
src/egl/Makefile | |||
src/egl/main/egl.pc |
@@ -74,7 +74,7 @@ endif | |||
# include only conditionally ? | |||
SUBDIRS += compiler | |||
if HAVE_GALLIUM_RADEON_COMMON | |||
if HAVE_AMD_DRIVERS | |||
SUBDIRS += amd | |||
endif | |||
@@ -120,6 +120,12 @@ if HAVE_INTEL_VULKAN | |||
SUBDIRS += intel/vulkan | |||
endif | |||
# Requires wayland-drm | |||
if HAVE_RADEON_VULKAN | |||
SUBDIRS += amd/common | |||
SUBDIRS += amd/vulkan | |||
endif | |||
if HAVE_GALLIUM | |||
SUBDIRS += gallium | |||
endif |
@@ -0,0 +1,51 @@ | |||
# Copyright © 2016 Bas Nieuwenhuizen | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a | |||
# copy of this software and associated documentation files (the "Software"), | |||
# to deal in the Software without restriction, including without limitation | |||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
# and/or sell copies of the Software, and to permit persons to whom the | |||
# Software is furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice (including the next | |||
# paragraph) shall be included in all copies or substantial portions of the | |||
# Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
# IN THE SOFTWARE. | |||
include Makefile.sources | |||
# TODO cleanup these | |||
AM_CPPFLAGS = \ | |||
$(VALGRIND_CFLAGS) \ | |||
$(DEFINES) \ | |||
-I$(top_srcdir)/include \ | |||
-I$(top_builddir)/src \ | |||
-I$(top_srcdir)/src \ | |||
-I$(top_builddir)/src/compiler \ | |||
-I$(top_builddir)/src/compiler/nir \ | |||
-I$(top_srcdir)/src/compiler \ | |||
-I$(top_srcdir)/src/mapi \ | |||
-I$(top_srcdir)/src/mesa \ | |||
-I$(top_srcdir)/src/mesa/drivers/dri/common \ | |||
-I$(top_srcdir)/src/gallium/auxiliary \ | |||
-I$(top_srcdir)/src/gallium/include | |||
AM_CFLAGS = $(VISIBILITY_CFLAGS) \ | |||
$(PTHREAD_CFLAGS) \ | |||
$(LLVM_CFLAGS) \ | |||
$(LIBELF_CFLAGS) | |||
AM_CXXFLAGS = \ | |||
$(VISIBILITY_CXXFLAGS) \ | |||
$(LLVM_CXXFLAGS) | |||
noinst_LTLIBRARIES = libamd_common.la | |||
libamd_common_la_SOURCES = $(AMD_COMPILER_SOURCES) |
@@ -0,0 +1,29 @@ | |||
# Copyright © 2016 Bas Nieuwenhuizen | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a | |||
# copy of this software and associated documentation files (the "Software"), | |||
# to deal in the Software without restriction, including without limitation | |||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
# and/or sell copies of the Software, and to permit persons to whom the | |||
# Software is furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice (including the next | |||
# paragraph) shall be included in all copies or substantial portions of the | |||
# Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
# IN THE SOFTWARE. | |||
AMD_COMPILER_SOURCES := \ | |||
ac_binary.c \ | |||
ac_binary.h \ | |||
ac_llvm_helper.cpp \ | |||
ac_llvm_util.c \ | |||
ac_llvm_util.h \ | |||
ac_nir_to_llvm.c \ | |||
ac_nir_to_llvm.h |
@@ -0,0 +1,288 @@ | |||
/* | |||
* Copyright 2014 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
* SOFTWARE. | |||
* | |||
* Authors: Tom Stellard <thomas.stellard@amd.com> | |||
* | |||
* Based on radeon_elf_util.c. | |||
*/ | |||
#include "ac_binary.h" | |||
#include "util/u_math.h" | |||
#include "util/u_memory.h" | |||
#include <gelf.h> | |||
#include <libelf.h> | |||
#include <stdio.h> | |||
#include <sid.h> | |||
#define SPILLED_SGPRS 0x4 | |||
#define SPILLED_VGPRS 0x8 | |||
static void parse_symbol_table(Elf_Data *symbol_table_data, | |||
const GElf_Shdr *symbol_table_header, | |||
struct ac_shader_binary *binary) | |||
{ | |||
GElf_Sym symbol; | |||
unsigned i = 0; | |||
unsigned symbol_count = | |||
symbol_table_header->sh_size / symbol_table_header->sh_entsize; | |||
/* We are over allocating this list, because symbol_count gives the | |||
* total number of symbols, and we will only be filling the list | |||
* with offsets of global symbols. The memory savings from | |||
* allocating the correct size of this list will be small, and | |||
* I don't think it is worth the cost of pre-computing the number | |||
* of global symbols. | |||
*/ | |||
binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t)); | |||
while (gelf_getsym(symbol_table_data, i++, &symbol)) { | |||
unsigned i; | |||
if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL || | |||
symbol.st_shndx == 0 /* Undefined symbol */) { | |||
continue; | |||
} | |||
binary->global_symbol_offsets[binary->global_symbol_count] = | |||
symbol.st_value; | |||
/* Sort the list using bubble sort. This list will usually | |||
* be small. */ | |||
for (i = binary->global_symbol_count; i > 0; --i) { | |||
uint64_t lhs = binary->global_symbol_offsets[i - 1]; | |||
uint64_t rhs = binary->global_symbol_offsets[i]; | |||
if (lhs < rhs) { | |||
break; | |||
} | |||
binary->global_symbol_offsets[i] = lhs; | |||
binary->global_symbol_offsets[i - 1] = rhs; | |||
} | |||
++binary->global_symbol_count; | |||
} | |||
} | |||
static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, | |||
unsigned symbol_sh_link, | |||
struct ac_shader_binary *binary) | |||
{ | |||
unsigned i; | |||
if (!relocs || !symbols || !binary->reloc_count) { | |||
return; | |||
} | |||
binary->relocs = CALLOC(binary->reloc_count, | |||
sizeof(struct ac_shader_reloc)); | |||
for (i = 0; i < binary->reloc_count; i++) { | |||
GElf_Sym symbol; | |||
GElf_Rel rel; | |||
char *symbol_name; | |||
struct ac_shader_reloc *reloc = &binary->relocs[i]; | |||
gelf_getrel(relocs, i, &rel); | |||
gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol); | |||
symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); | |||
reloc->offset = rel.r_offset; | |||
strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1); | |||
reloc->name[sizeof(reloc->name)-1] = 0; | |||
} | |||
} | |||
void ac_elf_read(const char *elf_data, unsigned elf_size, | |||
struct ac_shader_binary *binary) | |||
{ | |||
char *elf_buffer; | |||
Elf *elf; | |||
Elf_Scn *section = NULL; | |||
Elf_Data *symbols = NULL, *relocs = NULL; | |||
size_t section_str_index; | |||
unsigned symbol_sh_link = 0; | |||
/* One of the libelf implementations | |||
* (http://www.mr511.de/software/english.htm) requires calling | |||
* elf_version() before elf_memory(). | |||
*/ | |||
elf_version(EV_CURRENT); | |||
elf_buffer = MALLOC(elf_size); | |||
memcpy(elf_buffer, elf_data, elf_size); | |||
elf = elf_memory(elf_buffer, elf_size); | |||
elf_getshdrstrndx(elf, §ion_str_index); | |||
while ((section = elf_nextscn(elf, section))) { | |||
const char *name; | |||
Elf_Data *section_data = NULL; | |||
GElf_Shdr section_header; | |||
if (gelf_getshdr(section, §ion_header) != §ion_header) { | |||
fprintf(stderr, "Failed to read ELF section header\n"); | |||
return; | |||
} | |||
name = elf_strptr(elf, section_str_index, section_header.sh_name); | |||
if (!strcmp(name, ".text")) { | |||
section_data = elf_getdata(section, section_data); | |||
binary->code_size = section_data->d_size; | |||
binary->code = MALLOC(binary->code_size * sizeof(unsigned char)); | |||
memcpy(binary->code, section_data->d_buf, binary->code_size); | |||
} else if (!strcmp(name, ".AMDGPU.config")) { | |||
section_data = elf_getdata(section, section_data); | |||
binary->config_size = section_data->d_size; | |||
binary->config = MALLOC(binary->config_size * sizeof(unsigned char)); | |||
memcpy(binary->config, section_data->d_buf, binary->config_size); | |||
} else if (!strcmp(name, ".AMDGPU.disasm")) { | |||
/* Always read disassembly if it's available. */ | |||
section_data = elf_getdata(section, section_data); | |||
binary->disasm_string = strndup(section_data->d_buf, | |||
section_data->d_size); | |||
} else if (!strncmp(name, ".rodata", 7)) { | |||
section_data = elf_getdata(section, section_data); | |||
binary->rodata_size = section_data->d_size; | |||
binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char)); | |||
memcpy(binary->rodata, section_data->d_buf, binary->rodata_size); | |||
} else if (!strncmp(name, ".symtab", 7)) { | |||
symbols = elf_getdata(section, section_data); | |||
symbol_sh_link = section_header.sh_link; | |||
parse_symbol_table(symbols, §ion_header, binary); | |||
} else if (!strcmp(name, ".rel.text")) { | |||
relocs = elf_getdata(section, section_data); | |||
binary->reloc_count = section_header.sh_size / | |||
section_header.sh_entsize; | |||
} | |||
} | |||
parse_relocs(elf, relocs, symbols, symbol_sh_link, binary); | |||
if (elf){ | |||
elf_end(elf); | |||
} | |||
FREE(elf_buffer); | |||
/* Cache the config size per symbol */ | |||
if (binary->global_symbol_count) { | |||
binary->config_size_per_symbol = | |||
binary->config_size / binary->global_symbol_count; | |||
} else { | |||
binary->global_symbol_count = 1; | |||
binary->config_size_per_symbol = binary->config_size; | |||
} | |||
} | |||
static | |||
const unsigned char *ac_shader_binary_config_start( | |||
const struct ac_shader_binary *binary, | |||
uint64_t symbol_offset) | |||
{ | |||
unsigned i; | |||
for (i = 0; i < binary->global_symbol_count; ++i) { | |||
if (binary->global_symbol_offsets[i] == symbol_offset) { | |||
unsigned offset = i * binary->config_size_per_symbol; | |||
return binary->config + offset; | |||
} | |||
} | |||
return binary->config; | |||
} | |||
static const char *scratch_rsrc_dword0_symbol = | |||
"SCRATCH_RSRC_DWORD0"; | |||
static const char *scratch_rsrc_dword1_symbol = | |||
"SCRATCH_RSRC_DWORD1"; | |||
void ac_shader_binary_read_config(struct ac_shader_binary *binary, | |||
struct ac_shader_config *conf, | |||
unsigned symbol_offset) | |||
{ | |||
unsigned i; | |||
const unsigned char *config = | |||
ac_shader_binary_config_start(binary, symbol_offset); | |||
bool really_needs_scratch = false; | |||
/* LLVM adds SGPR spills to the scratch size. | |||
* Find out if we really need the scratch buffer. | |||
*/ | |||
for (i = 0; i < binary->reloc_count; i++) { | |||
const struct ac_shader_reloc *reloc = &binary->relocs[i]; | |||
if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || | |||
!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { | |||
really_needs_scratch = true; | |||
break; | |||
} | |||
} | |||
for (i = 0; i < binary->config_size_per_symbol; i+= 8) { | |||
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); | |||
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); | |||
switch (reg) { | |||
case R_00B028_SPI_SHADER_PGM_RSRC1_PS: | |||
case R_00B128_SPI_SHADER_PGM_RSRC1_VS: | |||
case R_00B228_SPI_SHADER_PGM_RSRC1_GS: | |||
case R_00B848_COMPUTE_PGM_RSRC1: | |||
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); | |||
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); | |||
conf->float_mode = G_00B028_FLOAT_MODE(value); | |||
break; | |||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: | |||
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); | |||
break; | |||
case R_00B84C_COMPUTE_PGM_RSRC2: | |||
conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); | |||
break; | |||
case R_0286CC_SPI_PS_INPUT_ENA: | |||
conf->spi_ps_input_ena = value; | |||
break; | |||
case R_0286D0_SPI_PS_INPUT_ADDR: | |||
conf->spi_ps_input_addr = value; | |||
break; | |||
case R_0286E8_SPI_TMPRING_SIZE: | |||
case R_00B860_COMPUTE_TMPRING_SIZE: | |||
/* WAVESIZE is in units of 256 dwords. */ | |||
if (really_needs_scratch) | |||
conf->scratch_bytes_per_wave = | |||
G_00B860_WAVESIZE(value) * 256 * 4; | |||
break; | |||
case SPILLED_SGPRS: | |||
conf->spilled_sgprs = value; | |||
break; | |||
case SPILLED_VGPRS: | |||
conf->spilled_vgprs = value; | |||
break; | |||
default: | |||
{ | |||
static bool printed; | |||
if (!printed) { | |||
fprintf(stderr, "Warning: LLVM emitted unknown " | |||
"config register: 0x%x\n", reg); | |||
printed = true; | |||
} | |||
} | |||
break; | |||
} | |||
if (!conf->spi_ps_input_addr) | |||
conf->spi_ps_input_addr = conf->spi_ps_input_ena; | |||
} | |||
} |
@@ -0,0 +1,88 @@ | |||
/* | |||
* Copyright 2014 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
* SOFTWARE. | |||
* | |||
* Authors: Tom Stellard <thomas.stellard@amd.com> | |||
* | |||
*/ | |||
#pragma once | |||
#include <stdint.h> | |||
struct ac_shader_reloc { | |||
char name[32]; | |||
uint64_t offset; | |||
}; | |||
struct ac_shader_binary { | |||
/** Shader code */ | |||
unsigned char *code; | |||
unsigned code_size; | |||
/** Config/Context register state that accompanies this shader. | |||
* This is a stream of dword pairs. First dword contains the | |||
* register address, the second dword contains the value.*/ | |||
unsigned char *config; | |||
unsigned config_size; | |||
/** The number of bytes of config information for each global symbol. | |||
*/ | |||
unsigned config_size_per_symbol; | |||
/** Constant data accessed by the shader. This will be uploaded | |||
* into a constant buffer. */ | |||
unsigned char *rodata; | |||
unsigned rodata_size; | |||
/** List of symbol offsets for the shader */ | |||
uint64_t *global_symbol_offsets; | |||
unsigned global_symbol_count; | |||
struct ac_shader_reloc *relocs; | |||
unsigned reloc_count; | |||
/** Disassembled shader in a string. */ | |||
char *disasm_string; | |||
}; | |||
struct ac_shader_config { | |||
unsigned num_sgprs; | |||
unsigned num_vgprs; | |||
unsigned spilled_sgprs; | |||
unsigned spilled_vgprs; | |||
unsigned lds_size; | |||
unsigned spi_ps_input_ena; | |||
unsigned spi_ps_input_addr; | |||
unsigned float_mode; | |||
unsigned scratch_bytes_per_wave; | |||
}; | |||
/* | |||
* Parse the elf binary stored in \p elf_data and create a | |||
* ac_shader_binary object. | |||
*/ | |||
void ac_elf_read(const char *elf_data, unsigned elf_size, | |||
struct ac_shader_binary *binary); | |||
void ac_shader_binary_read_config(struct ac_shader_binary *binary, | |||
struct ac_shader_config *conf, | |||
unsigned symbol_offset); |
@@ -0,0 +1,46 @@ | |||
/* | |||
* Copyright 2014 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |||
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
*/ | |||
/* based on Marek's patch to lp_bld_misc.cpp */ | |||
// Workaround http://llvm.org/PR23628 | |||
#if HAVE_LLVM >= 0x0307 | |||
# pragma push_macro("DEBUG") | |||
# undef DEBUG | |||
#endif | |||
#include "ac_nir_to_llvm.h" | |||
#include <llvm-c/Core.h> | |||
#include <llvm/Target/TargetOptions.h> | |||
#include <llvm/ExecutionEngine/ExecutionEngine.h> | |||
extern "C" void | |||
ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) | |||
{ | |||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(val); | |||
llvm::AttrBuilder B; | |||
B.addDereferenceableAttr(bytes); | |||
A->addAttr(llvm::AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); | |||
} |
@@ -0,0 +1,142 @@ | |||
/* | |||
* Copyright 2014 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |||
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
*/ | |||
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ | |||
#include "ac_llvm_util.h" | |||
#include <llvm-c/Core.h> | |||
#include "c11/threads.h" | |||
#include <assert.h> | |||
#include <stdio.h> | |||
static void ac_init_llvm_target() | |||
{ | |||
#if HAVE_LLVM < 0x0307 | |||
LLVMInitializeR600TargetInfo(); | |||
LLVMInitializeR600Target(); | |||
LLVMInitializeR600TargetMC(); | |||
LLVMInitializeR600AsmPrinter(); | |||
#else | |||
LLVMInitializeAMDGPUTargetInfo(); | |||
LLVMInitializeAMDGPUTarget(); | |||
LLVMInitializeAMDGPUTargetMC(); | |||
LLVMInitializeAMDGPUAsmPrinter(); | |||
#endif | |||
} | |||
static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; | |||
static LLVMTargetRef ac_get_llvm_target(const char *triple) | |||
{ | |||
LLVMTargetRef target = NULL; | |||
char *err_message = NULL; | |||
call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); | |||
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { | |||
fprintf(stderr, "Cannot find target for triple %s ", triple); | |||
if (err_message) { | |||
fprintf(stderr, "%s\n", err_message); | |||
} | |||
LLVMDisposeMessage(err_message); | |||
return NULL; | |||
} | |||
return target; | |||
} | |||
static const char *ac_get_llvm_processor_name(enum radeon_family family) | |||
{ | |||
switch (family) { | |||
case CHIP_TAHITI: | |||
return "tahiti"; | |||
case CHIP_PITCAIRN: | |||
return "pitcairn"; | |||
case CHIP_VERDE: | |||
return "verde"; | |||
case CHIP_OLAND: | |||
return "oland"; | |||
case CHIP_HAINAN: | |||
return "hainan"; | |||
case CHIP_BONAIRE: | |||
return "bonaire"; | |||
case CHIP_KABINI: | |||
return "kabini"; | |||
case CHIP_KAVERI: | |||
return "kaveri"; | |||
case CHIP_HAWAII: | |||
return "hawaii"; | |||
case CHIP_MULLINS: | |||
return "mullins"; | |||
case CHIP_TONGA: | |||
return "tonga"; | |||
case CHIP_ICELAND: | |||
return "iceland"; | |||
case CHIP_CARRIZO: | |||
return "carrizo"; | |||
#if HAVE_LLVM <= 0x0307 | |||
case CHIP_FIJI: | |||
return "tonga"; | |||
case CHIP_STONEY: | |||
return "carrizo"; | |||
#else | |||
case CHIP_FIJI: | |||
return "fiji"; | |||
case CHIP_STONEY: | |||
return "stoney"; | |||
#endif | |||
#if HAVE_LLVM <= 0x0308 | |||
case CHIP_POLARIS10: | |||
return "tonga"; | |||
case CHIP_POLARIS11: | |||
return "tonga"; | |||
#else | |||
case CHIP_POLARIS10: | |||
return "polaris10"; | |||
case CHIP_POLARIS11: | |||
return "polaris11"; | |||
#endif | |||
default: | |||
return ""; | |||
} | |||
} | |||
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family) | |||
{ | |||
assert(family >= CHIP_TAHITI); | |||
const char *triple = "amdgcn--"; | |||
LLVMTargetRef target = ac_get_llvm_target(triple); | |||
LLVMTargetMachineRef tm = LLVMCreateTargetMachine( | |||
target, | |||
triple, | |||
ac_get_llvm_processor_name(family), | |||
"+DumpCode,+vgpr-spilling", | |||
LLVMCodeGenLevelDefault, | |||
LLVMRelocDefault, | |||
LLVMCodeModelDefault); | |||
return tm; | |||
} |
@@ -0,0 +1,31 @@ | |||
/* | |||
* Copyright 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |||
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
*/ | |||
#pragma once | |||
#include <llvm-c/TargetMachine.h> | |||
#include "amd_family.h" | |||
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family); |
@@ -0,0 +1,102 @@ | |||
/* | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <stdbool.h> | |||
#include "llvm-c/Core.h" | |||
#include "llvm-c/TargetMachine.h" | |||
#include "amd_family.h" | |||
struct ac_shader_binary; | |||
struct ac_shader_config; | |||
struct nir_shader; | |||
struct radv_pipeline_layout; | |||
struct ac_vs_variant_key { | |||
uint32_t instance_rate_inputs; | |||
}; | |||
struct ac_fs_variant_key { | |||
uint32_t col_format; | |||
uint32_t is_int8; | |||
}; | |||
union ac_shader_variant_key { | |||
struct ac_vs_variant_key vs; | |||
struct ac_fs_variant_key fs; | |||
}; | |||
struct ac_nir_compiler_options { | |||
struct radv_pipeline_layout *layout; | |||
union ac_shader_variant_key key; | |||
bool unsafe_math; | |||
enum radeon_family family; | |||
enum chip_class chip_class; | |||
}; | |||
struct ac_shader_variant_info { | |||
unsigned num_user_sgprs; | |||
unsigned num_input_sgprs; | |||
unsigned num_input_vgprs; | |||
union { | |||
struct { | |||
unsigned param_exports; | |||
unsigned pos_exports; | |||
unsigned vgpr_comp_cnt; | |||
uint32_t export_mask; | |||
bool writes_pointsize; | |||
uint8_t clip_dist_mask; | |||
uint8_t cull_dist_mask; | |||
} vs; | |||
struct { | |||
unsigned num_interp; | |||
uint32_t input_mask; | |||
unsigned output_mask; | |||
uint32_t flat_shaded_mask; | |||
bool has_pcoord; | |||
bool can_discard; | |||
bool writes_z; | |||
bool writes_stencil; | |||
bool early_fragment_test; | |||
bool writes_memory; | |||
} fs; | |||
struct { | |||
unsigned block_size[3]; | |||
} cs; | |||
}; | |||
}; | |||
void ac_compile_nir_shader(LLVMTargetMachineRef tm, | |||
struct ac_shader_binary *binary, | |||
struct ac_shader_config *config, | |||
struct ac_shader_variant_info *shader_info, | |||
struct nir_shader *nir, | |||
const struct ac_nir_compiler_options *options, | |||
bool dump_shader); | |||
#ifdef __cplusplus | |||
extern "C" | |||
#endif | |||
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); |
@@ -0,0 +1,6 @@ | |||
# Generated source files | |||
/radv_entrypoints.c | |||
/radv_entrypoints.h | |||
/radv_timestamp.h | |||
/dev_icd.json | |||
/vk_format_table.c |
@@ -0,0 +1,165 @@ | |||
# Copyright © 2016 Red Hat | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a | |||
# copy of this software and associated documentation files (the "Software"), | |||
# to deal in the Software without restriction, including without limitation | |||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
# and/or sell copies of the Software, and to permit persons to whom the | |||
# Software is furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice (including the next | |||
# paragraph) shall be included in all copies or substantial portions of the | |||
# Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
# IN THE SOFTWARE. | |||
include Makefile.sources | |||
vulkan_includedir = $(includedir)/vulkan | |||
vulkan_include_HEADERS = \ | |||
$(top_srcdir)/include/vulkan/vk_platform.h \ | |||
$(top_srcdir)/include/vulkan/vulkan.h | |||
lib_LTLIBRARIES = libvulkan_radeon.la | |||
# The gallium includes are for the util/u_math.h include from main/macros.h | |||
AM_CPPFLAGS = \ | |||
$(AMDGPU_CFLAGS) \ | |||
$(VALGRIND_CFLAGS) \ | |||
$(DEFINES) \ | |||
-I$(top_srcdir)/include \ | |||
-I$(top_builddir)/src \ | |||
-I$(top_srcdir)/src \ | |||
-I$(top_srcdir)/src/amd \ | |||
-I$(top_srcdir)/src/amd/common \ | |||
-I$(top_builddir)/src/compiler \ | |||
-I$(top_builddir)/src/compiler/nir \ | |||
-I$(top_srcdir)/src/compiler \ | |||
-I$(top_srcdir)/src/mapi \ | |||
-I$(top_srcdir)/src/mesa \ | |||
-I$(top_srcdir)/src/mesa/drivers/dri/common \ | |||
-I$(top_srcdir)/src/gallium/auxiliary \ | |||
-I$(top_srcdir)/src/gallium/include | |||
AM_CFLAGS = $(VISIBILITY_FLAGS) \ | |||
$(PTHREAD_CFLAGS) \ | |||
$(LLVM_CFLAGS) | |||
VULKAN_SOURCES = \ | |||
$(VULKAN_GENERATED_FILES) \ | |||
$(VULKAN_FILES) | |||
VULKAN_LIB_DEPS = $(AMDGPU_LIBS) | |||
if HAVE_PLATFORM_X11 | |||
AM_CPPFLAGS += \ | |||
$(XCB_DRI3_CFLAGS) \ | |||
-DVK_USE_PLATFORM_XCB_KHR \ | |||
-DVK_USE_PLATFORM_XLIB_KHR | |||
VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES) | |||
# FIXME: Use pkg-config for X11-xcb ldflags. | |||
VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb | |||
endif | |||
if HAVE_PLATFORM_WAYLAND | |||
AM_CPPFLAGS += \ | |||
-I$(top_builddir)/src/egl/wayland/wayland-drm \ | |||
-I$(top_srcdir)/src/egl/wayland/wayland-drm \ | |||
$(WAYLAND_CFLAGS) \ | |||
-DVK_USE_PLATFORM_WAYLAND_KHR | |||
VULKAN_SOURCES += $(VULKAN_WSI_WAYLAND_FILES) | |||
VULKAN_LIB_DEPS += \ | |||
$(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la \ | |||
$(WAYLAND_LIBS) | |||
endif | |||
noinst_LTLIBRARIES = libvulkan_common.la | |||
libvulkan_common_la_SOURCES = $(VULKAN_SOURCES) | |||
VULKAN_LIB_DEPS += \ | |||
libvulkan_common.la \ | |||
$(top_builddir)/src/amd/common/libamd_common.la \ | |||
$(top_builddir)/src/compiler/nir/libnir.la \ | |||
$(top_builddir)/src/util/libmesautil.la \ | |||
$(LLVM_LIBS) \ | |||
$(LIBELF_LIBS) \ | |||
$(PTHREAD_LIBS) \ | |||
$(LIBDRM_LIBS) \ | |||
$(PTHREAD_LIBS) \ | |||
$(DLOPEN_LIBS) \ | |||
-lm | |||
nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp | |||
libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES) | |||
radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_include_HEADERS) | |||
$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\ | |||
$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py header > $@ | |||
radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS) | |||
$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\ | |||
$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@ | |||
.PHONY: radv_timestamp.h | |||
radv_timestamp.h: | |||
@echo "Updating radv_timestamp.h" | |||
$(AM_V_GEN) echo "#define RADV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ | |||
vk_format_table.c: vk_format_table.py \ | |||
vk_format_parse.py \ | |||
vk_format_layout.csv | |||
$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@ | |||
BUILT_SOURCES = $(VULKAN_GENERATED_FILES) | |||
CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h | |||
EXTRA_DIST = \ | |||
$(top_srcdir)/include/vulkan/vk_icd.h \ | |||
radv_entrypoints_gen.py \ | |||
dev_icd.json.in \ | |||
radeon_icd.json | |||
libvulkan_radeon_la_LIBADD = $(VULKAN_LIB_DEPS) $(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la | |||
libvulkan_radeon_la_LDFLAGS = \ | |||
-shared \ | |||
-module \ | |||
-no-undefined \ | |||
-avoid-version \ | |||
$(BSYMBOLIC) \ | |||
$(LLVM_LDFLAGS) \ | |||
$(GC_SECTIONS) \ | |||
$(LD_NO_UNDEFINED) | |||
icdconfdir = @VULKAN_ICD_INSTALL_DIR@ | |||
icdconf_DATA = radeon_icd.json | |||
# The following is used for development purposes, by setting VK_ICD_FILENAMES. | |||
noinst_DATA = dev_icd.json | |||
dev_icd.json : dev_icd.json.in | |||
$(AM_V_GEN) $(SED) \ | |||
-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ | |||
< $(srcdir)/dev_icd.json.in > $@ | |||
include $(top_srcdir)/install-lib-links.mk | |||
noinst_HEADERS = | |||
LDADD = \ | |||
$(PTHREAD_LIBS) -lm -lstdc++ | |||
@@ -0,0 +1,67 @@ | |||
# Copyright © 2016 Red Hat | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a | |||
# copy of this software and associated documentation files (the "Software"), | |||
# to deal in the Software without restriction, including without limitation | |||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
# and/or sell copies of the Software, and to permit persons to whom the | |||
# Software is furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice (including the next | |||
# paragraph) shall be included in all copies or substantial portions of the | |||
# Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
# IN THE SOFTWARE. | |||
RADV_WS_AMDGPU_FILES := \ | |||
winsys/amdgpu/radv_amdgpu_bo.c \ | |||
winsys/amdgpu/radv_amdgpu_cs.c \ | |||
winsys/amdgpu/radv_amdgpu_surface.c \ | |||
winsys/amdgpu/radv_amdgpu_winsys.c \ | |||
winsys/amdgpu/radv_amdgpu_winsys.h | |||
VULKAN_FILES := \ | |||
radv_cmd_buffer.c \ | |||
radv_device.c \ | |||
radv_descriptor_set.c \ | |||
radv_formats.c \ | |||
radv_image.c \ | |||
radv_meta.c \ | |||
radv_meta.h \ | |||
radv_meta_blit.c \ | |||
radv_meta_blit2d.c \ | |||
radv_meta_buffer.c \ | |||
radv_meta_bufimage.c \ | |||
radv_meta_clear.c \ | |||
radv_meta_copy.c \ | |||
radv_meta_decompress.c \ | |||
radv_meta_fast_clear.c \ | |||
radv_meta_resolve.c \ | |||
radv_meta_resolve_cs.c \ | |||
radv_pass.c \ | |||
radv_pipeline.c \ | |||
radv_pipeline_cache.c \ | |||
radv_query.c \ | |||
radv_util.c \ | |||
radv_wsi.c \ | |||
si_cmd_buffer.c \ | |||
vk_format_table.c \ | |||
$(RADV_WS_AMDGPU_FILES) | |||
VULKAN_WSI_WAYLAND_FILES := \ | |||
radv_wsi_wayland.c | |||
VULKAN_WSI_X11_FILES := \ | |||
radv_wsi_x11.c | |||
VULKAN_GENERATED_FILES := \ | |||
radv_entrypoints.c \ | |||
radv_entrypoints.h \ | |||
radv_timestamp.h | |||
@@ -0,0 +1,7 @@ | |||
{ | |||
"file_format_version": "1.0.0", | |||
"ICD": { | |||
"library_path": "@build_libdir@/libvulkan_radeon.so", | |||
"abi_versions": "1.0.3" | |||
} | |||
} |
@@ -0,0 +1,7 @@ | |||
{ | |||
"file_format_version": "1.0.0", | |||
"ICD": { | |||
"library_path": "libvulkan_radeon.so", | |||
"abi_versions": "1.0.3" | |||
} | |||
} |
@@ -0,0 +1,117 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <string.h> | |||
#include <stdint.h> | |||
#include <assert.h> | |||
#include "r600d_common.h" | |||
static inline unsigned radeon_check_space(struct radeon_winsys *ws, | |||
struct radeon_winsys_cs *cs, | |||
unsigned needed) | |||
{ | |||
if (cs->max_dw - cs->cdw < needed) | |||
ws->cs_grow(cs, needed); | |||
return cs->cdw + needed; | |||
} | |||
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) | |||
{ | |||
assert(reg < R600_CONTEXT_REG_OFFSET); | |||
assert(cs->cdw + 2 + num <= cs->max_dw); | |||
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); | |||
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); | |||
} | |||
static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) | |||
{ | |||
radeon_set_config_reg_seq(cs, reg, 1); | |||
radeon_emit(cs, value); | |||
} | |||
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) | |||
{ | |||
assert(reg >= R600_CONTEXT_REG_OFFSET); | |||
assert(cs->cdw + 2 + num <= cs->max_dw); | |||
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); | |||
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); | |||
} | |||
static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) | |||
{ | |||
radeon_set_context_reg_seq(cs, reg, 1); | |||
radeon_emit(cs, value); | |||
} | |||
static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs, | |||
unsigned reg, unsigned idx, | |||
unsigned value) | |||
{ | |||
assert(reg >= R600_CONTEXT_REG_OFFSET); | |||
assert(cs->cdw + 3 <= cs->max_dw); | |||
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); | |||
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); | |||
radeon_emit(cs, value); | |||
} | |||
static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) | |||
{ | |||
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); | |||
assert(cs->cdw + 2 + num <= cs->max_dw); | |||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); | |||
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); | |||
} | |||
static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) | |||
{ | |||
radeon_set_sh_reg_seq(cs, reg, 1); | |||
radeon_emit(cs, value); | |||
} | |||
static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) | |||
{ | |||
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); | |||
assert(cs->cdw + 2 + num <= cs->max_dw); | |||
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); | |||
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); | |||
} | |||
static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) | |||
{ | |||
radeon_set_uconfig_reg_seq(cs, reg, 1); | |||
radeon_emit(cs, value); | |||
} | |||
static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs, | |||
unsigned reg, unsigned idx, | |||
unsigned value) | |||
{ | |||
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); | |||
assert(cs->cdw + 3 <= cs->max_dw); | |||
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0)); | |||
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28)); | |||
radeon_emit(cs, value); | |||
} |
@@ -0,0 +1,716 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include <string.h> | |||
#include <unistd.h> | |||
#include <fcntl.h> | |||
#include "util/mesa-sha1.h" | |||
#include "radv_private.h" | |||
#include "sid.h" | |||
VkResult radv_CreateDescriptorSetLayout( | |||
VkDevice _device, | |||
const VkDescriptorSetLayoutCreateInfo* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkDescriptorSetLayout* pSetLayout) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
struct radv_descriptor_set_layout *set_layout; | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); | |||
uint32_t max_binding = 0; | |||
uint32_t immutable_sampler_count = 0; | |||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { | |||
max_binding = MAX(max_binding, pCreateInfo->pBindings[j].binding); | |||
if (pCreateInfo->pBindings[j].pImmutableSamplers) | |||
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; | |||
} | |||
size_t size = sizeof(struct radv_descriptor_set_layout) + | |||
(max_binding + 1) * sizeof(set_layout->binding[0]) + | |||
immutable_sampler_count * sizeof(struct radv_sampler *); | |||
set_layout = radv_alloc2(&device->alloc, pAllocator, size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (!set_layout) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
/* We just allocate all the samplers at the end of the struct */ | |||
struct radv_sampler **samplers = | |||
(struct radv_sampler **)&set_layout->binding[max_binding + 1]; | |||
set_layout->binding_count = max_binding + 1; | |||
set_layout->shader_stages = 0; | |||
set_layout->size = 0; | |||
memset(set_layout->binding, 0, size - sizeof(struct radv_descriptor_set_layout)); | |||
uint32_t buffer_count = 0; | |||
uint32_t dynamic_offset_count = 0; | |||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { | |||
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; | |||
uint32_t b = binding->binding; | |||
uint32_t alignment; | |||
switch (binding->descriptorType) { | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: | |||
set_layout->binding[b].dynamic_offset_count = 1; | |||
set_layout->dynamic_shader_stages |= binding->stageFlags; | |||
set_layout->binding[b].size = 0; | |||
set_layout->binding[b].buffer_count = 1; | |||
alignment = 1; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: | |||
set_layout->binding[b].size = 16; | |||
set_layout->binding[b].buffer_count = 1; | |||
alignment = 16; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: | |||
/* main descriptor + fmask descriptor */ | |||
set_layout->binding[b].size = 64; | |||
set_layout->binding[b].buffer_count = 1; | |||
alignment = 32; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: | |||
/* main descriptor + fmask descriptor + sampler */ | |||
set_layout->binding[b].size = 96; | |||
set_layout->binding[b].buffer_count = 1; | |||
alignment = 32; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_SAMPLER: | |||
set_layout->binding[b].size = 16; | |||
alignment = 16; | |||
break; | |||
default: | |||
break; | |||
} | |||
set_layout->size = align(set_layout->size, alignment); | |||
assert(binding->descriptorCount > 0); | |||
set_layout->binding[b].type = binding->descriptorType; | |||
set_layout->binding[b].array_size = binding->descriptorCount; | |||
set_layout->binding[b].offset = set_layout->size; | |||
set_layout->binding[b].buffer_offset = buffer_count; | |||
set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count; | |||
set_layout->size += binding->descriptorCount * set_layout->binding[b].size; | |||
buffer_count += binding->descriptorCount * set_layout->binding[b].buffer_count; | |||
dynamic_offset_count += binding->descriptorCount * | |||
set_layout->binding[b].dynamic_offset_count; | |||
if (binding->pImmutableSamplers) { | |||
set_layout->binding[b].immutable_samplers = samplers; | |||
samplers += binding->descriptorCount; | |||
for (uint32_t i = 0; i < binding->descriptorCount; i++) | |||
set_layout->binding[b].immutable_samplers[i] = | |||
radv_sampler_from_handle(binding->pImmutableSamplers[i]); | |||
} else { | |||
set_layout->binding[b].immutable_samplers = NULL; | |||
} | |||
set_layout->shader_stages |= binding->stageFlags; | |||
} | |||
set_layout->buffer_count = buffer_count; | |||
set_layout->dynamic_offset_count = dynamic_offset_count; | |||
*pSetLayout = radv_descriptor_set_layout_to_handle(set_layout); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroyDescriptorSetLayout( | |||
VkDevice _device, | |||
VkDescriptorSetLayout _set_layout, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, _set_layout); | |||
if (!set_layout) | |||
return; | |||
radv_free2(&device->alloc, pAllocator, set_layout); | |||
} | |||
/* | |||
* Pipeline layouts. These have nothing to do with the pipeline. They are | |||
* just muttiple descriptor set layouts pasted together | |||
*/ | |||
VkResult radv_CreatePipelineLayout( | |||
VkDevice _device, | |||
const VkPipelineLayoutCreateInfo* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkPipelineLayout* pPipelineLayout) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
struct radv_pipeline_layout *layout; | |||
struct mesa_sha1 *ctx; | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); | |||
layout = radv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (layout == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
layout->num_sets = pCreateInfo->setLayoutCount; | |||
unsigned dynamic_offset_count = 0; | |||
ctx = _mesa_sha1_init(); | |||
for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { | |||
RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, | |||
pCreateInfo->pSetLayouts[set]); | |||
layout->set[set].layout = set_layout; | |||
layout->set[set].dynamic_offset_start = dynamic_offset_count; | |||
for (uint32_t b = 0; b < set_layout->binding_count; b++) { | |||
dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count; | |||
} | |||
_mesa_sha1_update(ctx, set_layout->binding, | |||
sizeof(set_layout->binding[0]) * set_layout->binding_count); | |||
} | |||
layout->dynamic_offset_count = dynamic_offset_count; | |||
layout->push_constant_size = 0; | |||
for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { | |||
const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i; | |||
layout->push_constant_size = MAX2(layout->push_constant_size, | |||
range->offset + range->size); | |||
} | |||
layout->push_constant_size = align(layout->push_constant_size, 16); | |||
_mesa_sha1_update(ctx, &layout->push_constant_size, | |||
sizeof(layout->push_constant_size)); | |||
_mesa_sha1_final(ctx, layout->sha1); | |||
*pPipelineLayout = radv_pipeline_layout_to_handle(layout); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroyPipelineLayout( | |||
VkDevice _device, | |||
VkPipelineLayout _pipelineLayout, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout); | |||
if (!pipeline_layout) | |||
return; | |||
radv_free2(&device->alloc, pAllocator, pipeline_layout); | |||
} | |||
#define EMPTY 1 | |||
static VkResult | |||
radv_descriptor_set_create(struct radv_device *device, | |||
struct radv_descriptor_pool *pool, | |||
struct radv_cmd_buffer *cmd_buffer, | |||
const struct radv_descriptor_set_layout *layout, | |||
struct radv_descriptor_set **out_set) | |||
{ | |||
struct radv_descriptor_set *set; | |||
unsigned mem_size = sizeof(struct radv_descriptor_set) + | |||
sizeof(struct radeon_winsys_bo *) * layout->buffer_count; | |||
set = radv_alloc2(&device->alloc, NULL, mem_size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (!set) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
memset(set, 0, mem_size); | |||
if (layout->dynamic_offset_count) { | |||
unsigned size = sizeof(struct radv_descriptor_range) * | |||
layout->dynamic_offset_count; | |||
set->dynamic_descriptors = radv_alloc2(&device->alloc, NULL, size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (!set->dynamic_descriptors) { | |||
radv_free2(&device->alloc, NULL, set); | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} | |||
} | |||
set->layout = layout; | |||
if (layout->size) { | |||
uint32_t layout_size = align_u32(layout->size, 32); | |||
set->size = layout->size; | |||
if (!cmd_buffer) { | |||
if (pool->current_offset + layout_size <= pool->size) { | |||
set->bo = pool->bo; | |||
set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset); | |||
set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset; | |||
pool->current_offset += layout_size; | |||
} else { | |||
int entry = pool->free_list, prev_entry = -1; | |||
uint32_t offset; | |||
while (entry >= 0) { | |||
if (pool->free_nodes[entry].size >= layout_size) { | |||
if (prev_entry >= 0) | |||
pool->free_nodes[prev_entry].next = pool->free_nodes[entry].next; | |||
else | |||
pool->free_list = pool->free_nodes[entry].next; | |||
break; | |||
} | |||
prev_entry = entry; | |||
entry = pool->free_nodes[entry].next; | |||
} | |||
if (entry < 0) { | |||
radv_free2(&device->alloc, NULL, set); | |||
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); | |||
} | |||
offset = pool->free_nodes[entry].offset; | |||
pool->free_nodes[entry].next = pool->full_list; | |||
pool->full_list = entry; | |||
set->bo = pool->bo; | |||
set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset); | |||
set->va = device->ws->buffer_get_va(set->bo) + offset; | |||
} | |||
} else { | |||
unsigned bo_offset; | |||
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32, | |||
&bo_offset, | |||
(void**)&set->mapped_ptr)) { | |||
radv_free2(&device->alloc, NULL, set->dynamic_descriptors); | |||
radv_free2(&device->alloc, NULL, set); | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} | |||
set->va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); | |||
set->va += bo_offset; | |||
} | |||
} | |||
if (pool) | |||
list_add(&set->descriptor_pool, &pool->descriptor_sets); | |||
else | |||
list_inithead(&set->descriptor_pool); | |||
for (unsigned i = 0; i < layout->binding_count; ++i) { | |||
if (!layout->binding[i].immutable_samplers) | |||
continue; | |||
unsigned offset = layout->binding[i].offset / 4; | |||
if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) | |||
offset += 16; | |||
for (unsigned j = 0; j < layout->binding[i].array_size; ++j) { | |||
struct radv_sampler* sampler = layout->binding[i].immutable_samplers[j]; | |||
memcpy(set->mapped_ptr + offset, &sampler->state, 16); | |||
offset += layout->binding[i].size / 4; | |||
} | |||
} | |||
*out_set = set; | |||
return VK_SUCCESS; | |||
} | |||
static void | |||
radv_descriptor_set_destroy(struct radv_device *device, | |||
struct radv_descriptor_pool *pool, | |||
struct radv_descriptor_set *set, | |||
bool free_bo) | |||
{ | |||
if (free_bo && set->size) { | |||
assert(pool->full_list >= 0); | |||
int next = pool->free_nodes[pool->full_list].next; | |||
pool->free_nodes[pool->full_list].next = pool->free_list; | |||
pool->free_nodes[pool->full_list].offset = (uint8_t*)set->mapped_ptr - pool->mapped_ptr; | |||
pool->free_nodes[pool->full_list].size = align_u32(set->size, 32); | |||
pool->free_list = pool->full_list; | |||
pool->full_list = next; | |||
} | |||
if (set->dynamic_descriptors) | |||
radv_free2(&device->alloc, NULL, set->dynamic_descriptors); | |||
if (!list_empty(&set->descriptor_pool)) | |||
list_del(&set->descriptor_pool); | |||
radv_free2(&device->alloc, NULL, set); | |||
} | |||
VkResult | |||
radv_temp_descriptor_set_create(struct radv_device *device, | |||
struct radv_cmd_buffer *cmd_buffer, | |||
VkDescriptorSetLayout _layout, | |||
VkDescriptorSet *_set) | |||
{ | |||
RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, _layout); | |||
struct radv_descriptor_set *set; | |||
VkResult ret; | |||
ret = radv_descriptor_set_create(device, NULL, cmd_buffer, layout, &set); | |||
*_set = radv_descriptor_set_to_handle(set); | |||
return ret; | |||
} | |||
void | |||
radv_temp_descriptor_set_destroy(struct radv_device *device, | |||
VkDescriptorSet _set) | |||
{ | |||
RADV_FROM_HANDLE(radv_descriptor_set, set, _set); | |||
radv_descriptor_set_destroy(device, NULL, set, false); | |||
} | |||
VkResult radv_CreateDescriptorPool( | |||
VkDevice _device, | |||
const VkDescriptorPoolCreateInfo* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkDescriptorPool* pDescriptorPool) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
struct radv_descriptor_pool *pool; | |||
unsigned max_sets = pCreateInfo->maxSets * 2; | |||
int size = sizeof(struct radv_descriptor_pool) + | |||
max_sets * sizeof(struct radv_descriptor_pool_free_node); | |||
uint64_t bo_size = 0; | |||
pool = radv_alloc2(&device->alloc, pAllocator, size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (!pool) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
memset(pool, 0, sizeof(*pool)); | |||
pool->free_list = -1; | |||
pool->full_list = 0; | |||
pool->free_nodes[max_sets - 1].next = -1; | |||
pool->max_sets = max_sets; | |||
for (int i = 0; i + 1 < max_sets; ++i) | |||
pool->free_nodes[i].next = i + 1; | |||
for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { | |||
switch(pCreateInfo->pPoolSizes[i].type) { | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: | |||
break; | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_SAMPLER: | |||
/* 32 as we may need to align for images */ | |||
bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: | |||
bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount; | |||
break; | |||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: | |||
bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount; | |||
break; | |||
default: | |||
unreachable("unknown descriptor type\n"); | |||
break; | |||
} | |||
} | |||
if (bo_size) { | |||
pool->bo = device->ws->buffer_create(device->ws, bo_size, | |||
32, RADEON_DOMAIN_VRAM, 0); | |||
pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo); | |||
} | |||
pool->size = bo_size; | |||
list_inithead(&pool->descriptor_sets); | |||
*pDescriptorPool = radv_descriptor_pool_to_handle(pool); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroyDescriptorPool( | |||
VkDevice _device, | |||
VkDescriptorPool _pool, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool); | |||
if (!pool) | |||
return; | |||
list_for_each_entry_safe(struct radv_descriptor_set, set, | |||
&pool->descriptor_sets, descriptor_pool) { | |||
radv_descriptor_set_destroy(device, pool, set, false); | |||
} | |||
if (pool->bo) | |||
device->ws->buffer_destroy(pool->bo); | |||
radv_free2(&device->alloc, pAllocator, pool); | |||
} | |||
VkResult radv_ResetDescriptorPool( | |||
VkDevice _device, | |||
VkDescriptorPool descriptorPool, | |||
VkDescriptorPoolResetFlags flags) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool); | |||
list_for_each_entry_safe(struct radv_descriptor_set, set, | |||
&pool->descriptor_sets, descriptor_pool) { | |||
radv_descriptor_set_destroy(device, pool, set, false); | |||
} | |||
pool->current_offset = 0; | |||
pool->free_list = -1; | |||
pool->full_list = 0; | |||
pool->free_nodes[pool->max_sets - 1].next = -1; | |||
for (int i = 0; i + 1 < pool->max_sets; ++i) | |||
pool->free_nodes[i].next = i + 1; | |||
return VK_SUCCESS; | |||
} | |||
VkResult radv_AllocateDescriptorSets( | |||
VkDevice _device, | |||
const VkDescriptorSetAllocateInfo* pAllocateInfo, | |||
VkDescriptorSet* pDescriptorSets) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_descriptor_pool, pool, pAllocateInfo->descriptorPool); | |||
VkResult result = VK_SUCCESS; | |||
uint32_t i; | |||
struct radv_descriptor_set *set; | |||
/* allocate a set of buffers for each shader to contain descriptors */ | |||
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { | |||
RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, | |||
pAllocateInfo->pSetLayouts[i]); | |||
result = radv_descriptor_set_create(device, pool, NULL, layout, &set); | |||
if (result != VK_SUCCESS) | |||
break; | |||
pDescriptorSets[i] = radv_descriptor_set_to_handle(set); | |||
} | |||
if (result != VK_SUCCESS) | |||
radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, | |||
i, pDescriptorSets); | |||
return result; | |||
} | |||
VkResult radv_FreeDescriptorSets( | |||
VkDevice _device, | |||
VkDescriptorPool descriptorPool, | |||
uint32_t count, | |||
const VkDescriptorSet* pDescriptorSets) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool); | |||
for (uint32_t i = 0; i < count; i++) { | |||
RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); | |||
if (set) | |||
radv_descriptor_set_destroy(device, pool, set, true); | |||
} | |||
return VK_SUCCESS; | |||
} | |||
static void write_texel_buffer_descriptor(struct radv_device *device, | |||
unsigned *dst, | |||
struct radeon_winsys_bo **buffer_list, | |||
const VkBufferView _buffer_view) | |||
{ | |||
RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view); | |||
memcpy(dst, buffer_view->state, 4 * 4); | |||
*buffer_list = buffer_view->bo; | |||
} | |||
static void write_buffer_descriptor(struct radv_device *device, | |||
unsigned *dst, | |||
struct radeon_winsys_bo **buffer_list, | |||
const VkDescriptorBufferInfo *buffer_info) | |||
{ | |||
RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer); | |||
uint64_t va = device->ws->buffer_get_va(buffer->bo); | |||
uint32_t range = buffer_info->range; | |||
if (buffer_info->range == VK_WHOLE_SIZE) | |||
range = buffer->size - buffer_info->offset; | |||
va += buffer_info->offset + buffer->offset; | |||
dst[0] = va; | |||
dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); | |||
dst[2] = range; | |||
dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | | |||
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | | |||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | | |||
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | | |||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | | |||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); | |||
*buffer_list = buffer->bo; | |||
} | |||
static void write_dynamic_buffer_descriptor(struct radv_device *device, | |||
struct radv_descriptor_range *range, | |||
struct radeon_winsys_bo **buffer_list, | |||
const VkDescriptorBufferInfo *buffer_info) | |||
{ | |||
RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer); | |||
uint64_t va = device->ws->buffer_get_va(buffer->bo); | |||
unsigned size = buffer_info->range; | |||
if (buffer_info->range == VK_WHOLE_SIZE) | |||
size = buffer->size - buffer_info->offset; | |||
va += buffer_info->offset + buffer->offset; | |||
range->va = va; | |||
range->size = size; | |||
*buffer_list = buffer->bo; | |||
} | |||
static void | |||
write_image_descriptor(struct radv_device *device, | |||
unsigned *dst, | |||
struct radeon_winsys_bo **buffer_list, | |||
const VkDescriptorImageInfo *image_info) | |||
{ | |||
RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView); | |||
memcpy(dst, iview->descriptor, 8 * 4); | |||
memcpy(dst + 8, iview->fmask_descriptor, 8 * 4); | |||
*buffer_list = iview->bo; | |||
} | |||
static void | |||
write_combined_image_sampler_descriptor(struct radv_device *device, | |||
unsigned *dst, | |||
struct radeon_winsys_bo **buffer_list, | |||
const VkDescriptorImageInfo *image_info, | |||
bool has_sampler) | |||
{ | |||
RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler); | |||
write_image_descriptor(device, dst, buffer_list, image_info); | |||
/* copy over sampler state */ | |||
if (has_sampler) | |||
memcpy(dst + 16, sampler->state, 16); | |||
} | |||
static void | |||
write_sampler_descriptor(struct radv_device *device, | |||
unsigned *dst, | |||
const VkDescriptorImageInfo *image_info) | |||
{ | |||
RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler); | |||
memcpy(dst, sampler->state, 16); | |||
} | |||
void radv_UpdateDescriptorSets( | |||
VkDevice _device, | |||
uint32_t descriptorWriteCount, | |||
const VkWriteDescriptorSet* pDescriptorWrites, | |||
uint32_t descriptorCopyCount, | |||
const VkCopyDescriptorSet* pDescriptorCopies) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
uint32_t i, j; | |||
for (i = 0; i < descriptorWriteCount; i++) { | |||
const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; | |||
RADV_FROM_HANDLE(radv_descriptor_set, set, writeset->dstSet); | |||
const struct radv_descriptor_set_binding_layout *binding_layout = | |||
set->layout->binding + writeset->dstBinding; | |||
uint32_t *ptr = set->mapped_ptr; | |||
struct radeon_winsys_bo **buffer_list = set->descriptors; | |||
ptr += binding_layout->offset / 4; | |||
ptr += binding_layout->size * writeset->dstArrayElement / 4; | |||
buffer_list += binding_layout->buffer_offset; | |||
buffer_list += binding_layout->buffer_count * writeset->dstArrayElement; | |||
for (j = 0; j < writeset->descriptorCount; ++j) { | |||
switch(writeset->descriptorType) { | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { | |||
unsigned idx = writeset->dstArrayElement + j; | |||
idx += binding_layout->dynamic_offset_offset; | |||
write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx, | |||
buffer_list, writeset->pBufferInfo + j); | |||
break; | |||
} | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: | |||
write_buffer_descriptor(device, ptr, buffer_list, | |||
writeset->pBufferInfo + j); | |||
break; | |||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: | |||
write_texel_buffer_descriptor(device, ptr, buffer_list, | |||
writeset->pTexelBufferView[j]); | |||
break; | |||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: | |||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: | |||
write_image_descriptor(device, ptr, buffer_list, | |||
writeset->pImageInfo + j); | |||
break; | |||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: | |||
write_combined_image_sampler_descriptor(device, ptr, buffer_list, | |||
writeset->pImageInfo + j, | |||
!binding_layout->immutable_samplers); | |||
break; | |||
case VK_DESCRIPTOR_TYPE_SAMPLER: | |||
assert(!binding_layout->immutable_samplers); | |||
write_sampler_descriptor(device, ptr, | |||
writeset->pImageInfo + j); | |||
break; | |||
default: | |||
unreachable("unimplemented descriptor type"); | |||
break; | |||
} | |||
ptr += binding_layout->size / 4; | |||
buffer_list += binding_layout->buffer_count; | |||
} | |||
} | |||
if (descriptorCopyCount) | |||
radv_finishme("copy descriptors"); | |||
} |
@@ -0,0 +1,81 @@ | |||
/* | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <vulkan/vulkan.h> | |||
#define MAX_SETS 8 | |||
struct radv_descriptor_set_binding_layout { | |||
VkDescriptorType type; | |||
/* Number of array elements in this binding */ | |||
uint16_t array_size; | |||
uint16_t offset; | |||
uint16_t buffer_offset; | |||
uint16_t dynamic_offset_offset; | |||
/* redundant with the type, each for a single array element */ | |||
uint16_t size; | |||
uint16_t buffer_count; | |||
uint16_t dynamic_offset_count; | |||
/* Immutable samplers (or NULL if no immutable samplers) */ | |||
struct radv_sampler **immutable_samplers; | |||
}; | |||
struct radv_descriptor_set_layout { | |||
/* Number of bindings in this descriptor set */ | |||
uint16_t binding_count; | |||
/* Total size of the descriptor set with room for all array entries */ | |||
uint16_t size; | |||
/* Shader stages affected by this descriptor set */ | |||
uint16_t shader_stages; | |||
uint16_t dynamic_shader_stages; | |||
/* Number of buffers in this descriptor set */ | |||
uint16_t buffer_count; | |||
/* Number of dynamic offsets used by this descriptor set */ | |||
uint16_t dynamic_offset_count; | |||
/* Bindings in this descriptor set */ | |||
struct radv_descriptor_set_binding_layout binding[0]; | |||
}; | |||
struct radv_pipeline_layout { | |||
struct { | |||
struct radv_descriptor_set_layout *layout; | |||
uint32_t size; | |||
uint32_t dynamic_offset_start; | |||
} set[MAX_SETS]; | |||
uint32_t num_sets; | |||
uint32_t push_constant_size; | |||
uint32_t dynamic_offset_count; | |||
unsigned char sha1[20]; | |||
}; |
@@ -0,0 +1,32 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <amdgpu.h> | |||
#include "radv_radeon_winsys.h" | |||
struct radv_device_info { | |||
uint32_t pci_id; | |||
enum chip_class chip_class; | |||
}; |
@@ -0,0 +1,351 @@ | |||
# coding=utf-8 | |||
# | |||
# Copyright © 2015 Intel Corporation | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a | |||
# copy of this software and associated documentation files (the "Software"), | |||
# to deal in the Software without restriction, including without limitation | |||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
# and/or sell copies of the Software, and to permit persons to whom the | |||
# Software is furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice (including the next | |||
# paragraph) shall be included in all copies or substantial portions of the | |||
# Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
# IN THE SOFTWARE. | |||
# | |||
import fileinput, re, sys | |||
# Each function typedef in the vulkan.h header is all on one line and matches | |||
# this regepx. We hope that won't change. | |||
p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') | |||
entrypoints = [] | |||
# We generate a static hash table for entry point lookup | |||
# (vkGetProcAddress). We use a linear congruential generator for our hash | |||
# function and a power-of-two size table. The prime numbers are determined | |||
# experimentally. | |||
none = 0xffff | |||
hash_size = 256 | |||
u32_mask = 2**32 - 1 | |||
hash_mask = hash_size - 1 | |||
prime_factor = 5024183 | |||
prime_step = 19 | |||
def hash(name): | |||
h = 0; | |||
for c in name: | |||
h = (h * prime_factor + ord(c)) & u32_mask | |||
return h | |||
def get_platform_guard_macro(name): | |||
if "Xlib" in name: | |||
return "VK_USE_PLATFORM_XLIB_KHR" | |||
elif "Xcb" in name: | |||
return "VK_USE_PLATFORM_XCB_KHR" | |||
elif "Wayland" in name: | |||
return "VK_USE_PLATFORM_WAYLAND_KHR" | |||
elif "Mir" in name: | |||
return "VK_USE_PLATFORM_MIR_KHR" | |||
elif "Android" in name: | |||
return "VK_USE_PLATFORM_ANDROID_KHR" | |||
elif "Win32" in name: | |||
return "VK_USE_PLATFORM_WIN32_KHR" | |||
else: | |||
return None | |||
def print_guard_start(name): | |||
guard = get_platform_guard_macro(name) | |||
if guard is not None: | |||
print "#ifdef {0}".format(guard) | |||
def print_guard_end(name): | |||
guard = get_platform_guard_macro(name) | |||
if guard is not None: | |||
print "#endif // {0}".format(guard) | |||
opt_header = False | |||
opt_code = False | |||
if (sys.argv[1] == "header"): | |||
opt_header = True | |||
sys.argv.pop() | |||
elif (sys.argv[1] == "code"): | |||
opt_code = True | |||
sys.argv.pop() | |||
# Parse the entry points in the header | |||
i = 0 | |||
for line in fileinput.input(): | |||
m = p.match(line) | |||
if (m): | |||
if m.group(2) == 'VoidFunction': | |||
continue | |||
fullname = "vk" + m.group(2) | |||
h = hash(fullname) | |||
entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) | |||
i = i + 1 | |||
# For outputting entrypoints.h we generate a radv_EntryPoint() prototype | |||
# per entry point. | |||
if opt_header: | |||
print "/* This file generated from vk_gen.py, don't edit directly. */\n" | |||
print "struct radv_dispatch_table {" | |||
print " union {" | |||
print " void *entrypoints[%d];" % len(entrypoints) | |||
print " struct {" | |||
for type, name, args, num, h in entrypoints: | |||
guard = get_platform_guard_macro(name) | |||
if guard is not None: | |||
print "#ifdef {0}".format(guard) | |||
print " PFN_vk{0} {0};".format(name) | |||
print "#else" | |||
print " void *{0};".format(name) | |||
print "#endif" | |||
else: | |||
print " PFN_vk{0} {0};".format(name) | |||
print " };\n" | |||
print " };\n" | |||
print "};\n" | |||
print "void radv_set_dispatch_devinfo(const struct radv_device_info *info);\n" | |||
for type, name, args, num, h in entrypoints: | |||
print_guard_start(name) | |||
print "%s radv_%s%s;" % (type, name, args) | |||
print "%s vi_%s%s;" % (type, name, args) | |||
print "%s cik_%s%s;" % (type, name, args) | |||
print "%s si_%s%s;" % (type, name, args) | |||
print "%s radv_validate_%s%s;" % (type, name, args) | |||
print_guard_end(name) | |||
exit() | |||
print """/* | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
/* DO NOT EDIT! This is a generated file. */ | |||
#include "radv_private.h" | |||
struct radv_entrypoint { | |||
uint32_t name; | |||
uint32_t hash; | |||
}; | |||
/* We use a big string constant to avoid lots of reloctions from the entry | |||
* point table to lots of little strings. The entries in the entry point table | |||
* store the index into this big string. | |||
*/ | |||
static const char strings[] =""" | |||
offsets = [] | |||
i = 0; | |||
for type, name, args, num, h in entrypoints: | |||
print " \"vk%s\\0\"" % name | |||
offsets.append(i) | |||
i += 2 + len(name) + 1 | |||
print " ;" | |||
# Now generate the table of all entry points and their validation functions | |||
print "\nstatic const struct radv_entrypoint entrypoints[] = {" | |||
for type, name, args, num, h in entrypoints: | |||
print " { %5d, 0x%08x }," % (offsets[num], h) | |||
print "};\n" | |||
print """ | |||
/* Weak aliases for all potential implementations. These will resolve to | |||
* NULL if they're not defined, which lets the resolve_entrypoint() function | |||
* either pick the correct entry point. | |||
*/ | |||
""" | |||
for layer in [ "radv", "validate", "si", "cik", "vi" ]: | |||
for type, name, args, num, h in entrypoints: | |||
print_guard_start(name) | |||
print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) | |||
print_guard_end(name) | |||
print "\nconst struct radv_dispatch_table %s_layer = {" % layer | |||
for type, name, args, num, h in entrypoints: | |||
print_guard_start(name) | |||
print " .%s = %s_%s," % (name, layer, name) | |||
print_guard_end(name) | |||
print "};\n" | |||
print """ | |||
#ifdef DEBUG | |||
static bool enable_validate = true; | |||
#else | |||
static bool enable_validate = false; | |||
#endif | |||
/* We can't use symbols that need resolving (like, oh, getenv) in the resolve | |||
* function. This means that we have to determine whether or not to use the | |||
* validation layer sometime before that. The constructor function attribute asks | |||
* the dynamic linker to invoke determine_validate() at dlopen() time which | |||
* works. | |||
*/ | |||
static void __attribute__ ((constructor)) | |||
determine_validate(void) | |||
{ | |||
const char *s = getenv("ANV_VALIDATE"); | |||
if (s) | |||
enable_validate = atoi(s); | |||
} | |||
static const struct radv_device_info *dispatch_devinfo; | |||
void | |||
radv_set_dispatch_devinfo(const struct radv_device_info *devinfo) | |||
{ | |||
dispatch_devinfo = devinfo; | |||
} | |||
void * __attribute__ ((noinline)) | |||
radv_resolve_entrypoint(uint32_t index) | |||
{ | |||
if (enable_validate && validate_layer.entrypoints[index]) | |||
return validate_layer.entrypoints[index]; | |||
if (dispatch_devinfo == NULL) { | |||
return radv_layer.entrypoints[index]; | |||
} | |||
switch (dispatch_devinfo->chip_class) { | |||
case VI: | |||
if (vi_layer.entrypoints[index]) | |||
return vi_layer.entrypoints[index]; | |||
/* fall through */ | |||
case CIK: | |||
if (cik_layer.entrypoints[index]) | |||
return cik_layer.entrypoints[index]; | |||
/* fall through */ | |||
case SI: | |||
if (si_layer.entrypoints[index]) | |||
return si_layer.entrypoints[index]; | |||
/* fall through */ | |||
case 0: | |||
return radv_layer.entrypoints[index]; | |||
default: | |||
unreachable("unsupported gen\\n"); | |||
} | |||
} | |||
""" | |||
# Now generate the hash table used for entry point look up. This is a | |||
# uint16_t table of entry point indices. We use 0xffff to indicate an entry | |||
# in the hash table is empty. | |||
map = [none for f in xrange(hash_size)] | |||
collisions = [0 for f in xrange(10)] | |||
for type, name, args, num, h in entrypoints: | |||
level = 0 | |||
while map[h & hash_mask] != none: | |||
h = h + prime_step | |||
level = level + 1 | |||
if level > 9: | |||
collisions[9] += 1 | |||
else: | |||
collisions[level] += 1 | |||
map[h & hash_mask] = num | |||
print "/* Hash table stats:" | |||
print " * size %d entries" % hash_size | |||
print " * collisions entries" | |||
for i in xrange(10): | |||
if (i == 9): | |||
plus = "+" | |||
else: | |||
plus = " " | |||
print " * %2d%s %4d" % (i, plus, collisions[i]) | |||
print " */\n" | |||
print "#define none 0x%04x\n" % none | |||
print "static const uint16_t map[] = {" | |||
for i in xrange(0, hash_size, 8): | |||
print " ", | |||
for j in xrange(i, i + 8): | |||
if map[j] & 0xffff == 0xffff: | |||
print " none,", | |||
else: | |||
print "0x%04x," % (map[j] & 0xffff), | |||
print "};" | |||
# Finally we generate the hash table lookup function. The hash function and | |||
# linear probing algorithm matches the hash table generated above. | |||
print """ | |||
void * | |||
radv_lookup_entrypoint(const char *name) | |||
{ | |||
static const uint32_t prime_factor = %d; | |||
static const uint32_t prime_step = %d; | |||
const struct radv_entrypoint *e; | |||
uint32_t hash, h, i; | |||
const char *p; | |||
hash = 0; | |||
for (p = name; *p; p++) | |||
hash = hash * prime_factor + *p; | |||
h = hash; | |||
do { | |||
i = map[h & %d]; | |||
if (i == none) | |||
return NULL; | |||
e = &entrypoints[i]; | |||
h += prime_step; | |||
} while (e->hash != hash); | |||
if (strcmp(name, strings + e->name) != 0) | |||
return NULL; | |||
return radv_resolve_entrypoint(i); | |||
} | |||
""" % (prime_factor, prime_step, hash_mask) |
@@ -0,0 +1,388 @@ | |||
/* | |||
* Copyright © 2016 Red Hat | |||
* based on intel anv code: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "radv_meta.h" | |||
#include <fcntl.h> | |||
#include <limits.h> | |||
#include <pwd.h> | |||
#include <sys/stat.h> | |||
void | |||
radv_meta_save(struct radv_meta_saved_state *state, | |||
const struct radv_cmd_buffer *cmd_buffer, | |||
uint32_t dynamic_mask) | |||
{ | |||
state->old_pipeline = cmd_buffer->state.pipeline; | |||
state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; | |||
memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, | |||
sizeof(state->old_vertex_bindings)); | |||
state->dynamic_mask = dynamic_mask; | |||
radv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, | |||
dynamic_mask); | |||
memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE); | |||
} | |||
void | |||
radv_meta_restore(const struct radv_meta_saved_state *state, | |||
struct radv_cmd_buffer *cmd_buffer) | |||
{ | |||
cmd_buffer->state.pipeline = state->old_pipeline; | |||
radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0); | |||
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, | |||
sizeof(state->old_vertex_bindings)); | |||
cmd_buffer->state.vb_dirty |= (1 << RADV_META_VERTEX_BINDING_COUNT) - 1; | |||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; | |||
radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, | |||
state->dynamic_mask); | |||
cmd_buffer->state.dirty |= state->dynamic_mask; | |||
memcpy(cmd_buffer->push_constants, state->push_constants, MAX_PUSH_CONSTANTS_SIZE); | |||
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT; | |||
} | |||
void | |||
radv_meta_save_pass(struct radv_meta_saved_pass_state *state, | |||
const struct radv_cmd_buffer *cmd_buffer) | |||
{ | |||
state->pass = cmd_buffer->state.pass; | |||
state->subpass = cmd_buffer->state.subpass; | |||
state->framebuffer = cmd_buffer->state.framebuffer; | |||
state->attachments = cmd_buffer->state.attachments; | |||
state->render_area = cmd_buffer->state.render_area; | |||
} | |||
void | |||
radv_meta_restore_pass(const struct radv_meta_saved_pass_state *state, | |||
struct radv_cmd_buffer *cmd_buffer) | |||
{ | |||
cmd_buffer->state.pass = state->pass; | |||
cmd_buffer->state.subpass = state->subpass; | |||
cmd_buffer->state.framebuffer = state->framebuffer; | |||
cmd_buffer->state.attachments = state->attachments; | |||
cmd_buffer->state.render_area = state->render_area; | |||
if (state->subpass) | |||
radv_emit_framebuffer_state(cmd_buffer); | |||
} | |||
void | |||
radv_meta_save_compute(struct radv_meta_saved_compute_state *state, | |||
const struct radv_cmd_buffer *cmd_buffer, | |||
unsigned push_constant_size) | |||
{ | |||
state->old_pipeline = cmd_buffer->state.compute_pipeline; | |||
state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; | |||
if (push_constant_size) | |||
memcpy(state->push_constants, cmd_buffer->push_constants, push_constant_size); | |||
} | |||
void | |||
radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state, | |||
struct radv_cmd_buffer *cmd_buffer, | |||
unsigned push_constant_size) | |||
{ | |||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, | |||
radv_pipeline_to_handle(state->old_pipeline)); | |||
radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0); | |||
if (push_constant_size) { | |||
memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size); | |||
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; | |||
} | |||
} | |||
VkImageViewType | |||
radv_meta_get_view_type(const struct radv_image *image) | |||
{ | |||
switch (image->type) { | |||
case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; | |||
case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; | |||
case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; | |||
default: | |||
unreachable("bad VkImageViewType"); | |||
} | |||
} | |||
/** | |||
* When creating a destination VkImageView, this function provides the needed | |||
* VkImageViewCreateInfo::subresourceRange::baseArrayLayer. | |||
*/ | |||
uint32_t | |||
radv_meta_get_iview_layer(const struct radv_image *dest_image, | |||
const VkImageSubresourceLayers *dest_subresource, | |||
const VkOffset3D *dest_offset) | |||
{ | |||
switch (dest_image->type) { | |||
case VK_IMAGE_TYPE_1D: | |||
case VK_IMAGE_TYPE_2D: | |||
return dest_subresource->baseArrayLayer; | |||
case VK_IMAGE_TYPE_3D: | |||
/* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, | |||
* but meta does it anyway. When doing so, we translate the | |||
* destination's z offset into an array offset. | |||
*/ | |||
return dest_offset->z; | |||
default: | |||
assert(!"bad VkImageType"); | |||
return 0; | |||
} | |||
} | |||
static void * | |||
meta_alloc(void* _device, size_t size, size_t alignment, | |||
VkSystemAllocationScope allocationScope) | |||
{ | |||
struct radv_device *device = _device; | |||
return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, | |||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); | |||
} | |||
static void * | |||
meta_realloc(void* _device, void *original, size_t size, size_t alignment, | |||
VkSystemAllocationScope allocationScope) | |||
{ | |||
struct radv_device *device = _device; | |||
return device->alloc.pfnReallocation(device->alloc.pUserData, original, | |||
size, alignment, | |||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); | |||
} | |||
static void | |||
meta_free(void* _device, void *data) | |||
{ | |||
struct radv_device *device = _device; | |||
return device->alloc.pfnFree(device->alloc.pUserData, data); | |||
} | |||
static bool | |||
radv_builtin_cache_path(char *path) | |||
{ | |||
char *xdg_cache_home = getenv("XDG_CACHE_HOME"); | |||
const char *suffix = "/radv_builtin_shaders"; | |||
const char *suffix2 = "/.cache/radv_builtin_shaders"; | |||
struct passwd pwd, *result; | |||
char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */ | |||
if (xdg_cache_home) { | |||
if (strlen(xdg_cache_home) + strlen(suffix) > PATH_MAX) | |||
return false; | |||
strcpy(path, xdg_cache_home); | |||
strcat(path, suffix); | |||
return true; | |||
} | |||
getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result); | |||
if (!result) | |||
return false; | |||
strcpy(path, pwd.pw_dir); | |||
strcat(path, "/.cache"); | |||
mkdir(path, 0755); | |||
strcat(path, suffix); | |||
return true; | |||
} | |||
static void | |||
radv_load_meta_pipeline(struct radv_device *device) | |||
{ | |||
char path[PATH_MAX + 1]; | |||
struct stat st; | |||
void *data = NULL; | |||
if (!radv_builtin_cache_path(path)) | |||
return; | |||
int fd = open(path, O_RDONLY); | |||
if (fd < 0) | |||
return; | |||
if (fstat(fd, &st)) | |||
goto fail; | |||
data = malloc(st.st_size); | |||
if (!data) | |||
goto fail; | |||
if(read(fd, data, st.st_size) == -1) | |||
goto fail; | |||
radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size); | |||
fail: | |||
free(data); | |||
close(fd); | |||
} | |||
static void | |||
radv_store_meta_pipeline(struct radv_device *device) | |||
{ | |||
char path[PATH_MAX + 1], path2[PATH_MAX + 7]; | |||
size_t size; | |||
void *data = NULL; | |||
if (!device->meta_state.cache.modified) | |||
return; | |||
if (radv_GetPipelineCacheData(radv_device_to_handle(device), | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&size, NULL)) | |||
return; | |||
if (!radv_builtin_cache_path(path)) | |||
return; | |||
strcpy(path2, path); | |||
strcat(path2, "XXXXXX"); | |||
int fd = mkstemp(path2);//open(path, O_WRONLY | O_CREAT, 0600); | |||
if (fd < 0) | |||
return; | |||
data = malloc(size); | |||
if (!data) | |||
goto fail; | |||
if (radv_GetPipelineCacheData(radv_device_to_handle(device), | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&size, data)) | |||
goto fail; | |||
if(write(fd, data, size) == -1) | |||
goto fail; | |||
rename(path2, path); | |||
fail: | |||
free(data); | |||
close(fd); | |||
unlink(path2); | |||
} | |||
VkResult | |||
radv_device_init_meta(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
device->meta_state.alloc = (VkAllocationCallbacks) { | |||
.pUserData = device, | |||
.pfnAllocation = meta_alloc, | |||
.pfnReallocation = meta_realloc, | |||
.pfnFree = meta_free, | |||
}; | |||
device->meta_state.cache.alloc = device->meta_state.alloc; | |||
radv_pipeline_cache_init(&device->meta_state.cache, device); | |||
radv_load_meta_pipeline(device); | |||
result = radv_device_init_meta_clear_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_clear; | |||
result = radv_device_init_meta_resolve_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_resolve; | |||
result = radv_device_init_meta_blit_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_blit; | |||
result = radv_device_init_meta_blit2d_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_blit2d; | |||
result = radv_device_init_meta_bufimage_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_bufimage; | |||
result = radv_device_init_meta_depth_decomp_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_depth_decomp; | |||
result = radv_device_init_meta_buffer_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_buffer; | |||
result = radv_device_init_meta_fast_clear_flush_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_fast_clear; | |||
result = radv_device_init_meta_resolve_compute_state(device); | |||
if (result != VK_SUCCESS) | |||
goto fail_resolve_compute; | |||
return VK_SUCCESS; | |||
fail_resolve_compute: | |||
radv_device_finish_meta_fast_clear_flush_state(device); | |||
fail_fast_clear: | |||
radv_device_finish_meta_buffer_state(device); | |||
fail_buffer: | |||
radv_device_finish_meta_depth_decomp_state(device); | |||
fail_depth_decomp: | |||
radv_device_finish_meta_bufimage_state(device); | |||
fail_bufimage: | |||
radv_device_finish_meta_blit2d_state(device); | |||
fail_blit2d: | |||
radv_device_finish_meta_blit_state(device); | |||
fail_blit: | |||
radv_device_finish_meta_resolve_state(device); | |||
fail_resolve: | |||
radv_device_finish_meta_clear_state(device); | |||
fail_clear: | |||
radv_pipeline_cache_finish(&device->meta_state.cache); | |||
return result; | |||
} | |||
void | |||
radv_device_finish_meta(struct radv_device *device) | |||
{ | |||
radv_device_finish_meta_clear_state(device); | |||
radv_device_finish_meta_resolve_state(device); | |||
radv_device_finish_meta_blit_state(device); | |||
radv_device_finish_meta_blit2d_state(device); | |||
radv_device_finish_meta_bufimage_state(device); | |||
radv_device_finish_meta_depth_decomp_state(device); | |||
radv_device_finish_meta_buffer_state(device); | |||
radv_device_finish_meta_fast_clear_flush_state(device); | |||
radv_device_finish_meta_resolve_compute_state(device); | |||
radv_store_meta_pipeline(device); | |||
radv_pipeline_cache_finish(&device->meta_state.cache); | |||
} | |||
/* | |||
* The most common meta operations all want to have the viewport | |||
* reset and any scissors disabled. The rest of the dynamic state | |||
* should have no effect. | |||
*/ | |||
void | |||
radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state, | |||
struct radv_cmd_buffer *cmd_buffer) | |||
{ | |||
uint32_t dirty_state = (1 << VK_DYNAMIC_STATE_VIEWPORT) | (1 << VK_DYNAMIC_STATE_SCISSOR); | |||
radv_meta_save(saved_state, cmd_buffer, dirty_state); | |||
cmd_buffer->state.dynamic.viewport.count = 0; | |||
cmd_buffer->state.dynamic.scissor.count = 0; | |||
cmd_buffer->state.dirty |= dirty_state; | |||
} |
@@ -0,0 +1,190 @@ | |||
/* | |||
* Copyright © 2016 Red Hat | |||
* based on intel anv code: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "radv_private.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#define RADV_META_VERTEX_BINDING_COUNT 2 | |||
struct radv_meta_saved_state { | |||
struct radv_vertex_binding old_vertex_bindings[RADV_META_VERTEX_BINDING_COUNT]; | |||
struct radv_descriptor_set *old_descriptor_set0; | |||
struct radv_pipeline *old_pipeline; | |||
/** | |||
* Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic | |||
* state. | |||
*/ | |||
uint32_t dynamic_mask; | |||
struct radv_dynamic_state dynamic; | |||
char push_constants[128]; | |||
}; | |||
struct radv_meta_saved_pass_state { | |||
struct radv_render_pass *pass; | |||
const struct radv_subpass *subpass; | |||
struct radv_attachment_state *attachments; | |||
struct radv_framebuffer *framebuffer; | |||
VkRect2D render_area; | |||
}; | |||
struct radv_meta_saved_compute_state { | |||
struct radv_descriptor_set *old_descriptor_set0; | |||
struct radv_pipeline *old_pipeline; | |||
char push_constants[128]; | |||
}; | |||
VkResult radv_device_init_meta_clear_state(struct radv_device *device); | |||
void radv_device_finish_meta_clear_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_resolve_state(struct radv_device *device); | |||
void radv_device_finish_meta_resolve_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device); | |||
void radv_device_finish_meta_depth_decomp_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device); | |||
void radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_blit_state(struct radv_device *device); | |||
void radv_device_finish_meta_blit_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_blit2d_state(struct radv_device *device); | |||
void radv_device_finish_meta_blit2d_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_buffer_state(struct radv_device *device); | |||
void radv_device_finish_meta_buffer_state(struct radv_device *device); | |||
VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device); | |||
void radv_device_finish_meta_resolve_compute_state(struct radv_device *device); | |||
void radv_meta_save(struct radv_meta_saved_state *state, | |||
const struct radv_cmd_buffer *cmd_buffer, | |||
uint32_t dynamic_mask); | |||
void radv_meta_restore(const struct radv_meta_saved_state *state, | |||
struct radv_cmd_buffer *cmd_buffer); | |||
void radv_meta_save_pass(struct radv_meta_saved_pass_state *state, | |||
const struct radv_cmd_buffer *cmd_buffer); | |||
void radv_meta_restore_pass(const struct radv_meta_saved_pass_state *state, | |||
struct radv_cmd_buffer *cmd_buffer); | |||
void radv_meta_save_compute(struct radv_meta_saved_compute_state *state, | |||
const struct radv_cmd_buffer *cmd_buffer, | |||
unsigned push_constant_size); | |||
void radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state, | |||
struct radv_cmd_buffer *cmd_buffer, | |||
unsigned push_constant_size); | |||
VkImageViewType radv_meta_get_view_type(const struct radv_image *image); | |||
uint32_t radv_meta_get_iview_layer(const struct radv_image *dest_image, | |||
const VkImageSubresourceLayers *dest_subresource, | |||
const VkOffset3D *dest_offset); | |||
struct radv_meta_blit2d_surf { | |||
/** The size of an element in bytes. */ | |||
uint8_t bs; | |||
VkFormat format; | |||
struct radv_image *image; | |||
unsigned level; | |||
unsigned layer; | |||
VkImageAspectFlags aspect_mask; | |||
}; | |||
struct radv_meta_blit2d_buffer { | |||
struct radv_buffer *buffer; | |||
uint32_t offset; | |||
uint32_t pitch; | |||
uint8_t bs; | |||
VkFormat format; | |||
}; | |||
struct radv_meta_blit2d_rect { | |||
uint32_t src_x, src_y; | |||
uint32_t dst_x, dst_y; | |||
uint32_t width, height; | |||
}; | |||
void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_saved_state *save); | |||
void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_blit2d_surf *src_img, | |||
struct radv_meta_blit2d_buffer *src_buf, | |||
struct radv_meta_blit2d_surf *dst, | |||
unsigned num_rects, | |||
struct radv_meta_blit2d_rect *rects); | |||
void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_saved_state *save); | |||
VkResult radv_device_init_meta_bufimage_state(struct radv_device *device); | |||
void radv_device_finish_meta_bufimage_state(struct radv_device *device); | |||
void radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_saved_compute_state *save); | |||
void radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_saved_compute_state *save); | |||
void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_blit2d_surf *src, | |||
struct radv_meta_blit2d_buffer *dst, | |||
unsigned num_rects, | |||
struct radv_meta_blit2d_rect *rects); | |||
void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image, | |||
VkImageSubresourceRange *subresourceRange); | |||
void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image, | |||
VkImageSubresourceRange *subresourceRange); | |||
void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image); | |||
void radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state, | |||
struct radv_cmd_buffer *cmd_buffer); | |||
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *src_image, | |||
VkImageLayout src_image_layout, | |||
struct radv_image *dest_image, | |||
VkImageLayout dest_image_layout, | |||
uint32_t region_count, | |||
const VkImageResolve *regions); | |||
#ifdef __cplusplus | |||
} | |||
#endif |
@@ -0,0 +1,543 @@ | |||
#include "radv_meta.h" | |||
#include "nir/nir_builder.h" | |||
#include "sid.h" | |||
#include "radv_cs.h" | |||
static nir_shader * | |||
build_buffer_fill_shader(struct radv_device *dev) | |||
{ | |||
nir_builder b; | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill"); | |||
b.shader->info.cs.local_size[0] = 64; | |||
b.shader->info.cs.local_size[1] = 1; | |||
b.shader->info.cs.local_size[2] = 1; | |||
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); | |||
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); | |||
nir_ssa_def *block_size = nir_imm_ivec4(&b, | |||
b.shader->info.cs.local_size[0], | |||
b.shader->info.cs.local_size[1], | |||
b.shader->info.cs.local_size[2], 0); | |||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); | |||
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); | |||
offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false); | |||
nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, | |||
nir_intrinsic_vulkan_resource_index); | |||
dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
nir_intrinsic_set_desc_set(dst_buf, 0); | |||
nir_intrinsic_set_binding(dst_buf, 0); | |||
nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); | |||
nir_builder_instr_insert(&b, &dst_buf->instr); | |||
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); | |||
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
load->num_components = 1; | |||
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value"); | |||
nir_builder_instr_insert(&b, &load->instr); | |||
nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false); | |||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); | |||
store->src[0] = nir_src_for_ssa(swizzled_load); | |||
store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); | |||
store->src[2] = nir_src_for_ssa(offset); | |||
nir_intrinsic_set_write_mask(store, 0xf); | |||
store->num_components = 4; | |||
nir_builder_instr_insert(&b, &store->instr); | |||
return b.shader; | |||
} | |||
static nir_shader * | |||
build_buffer_copy_shader(struct radv_device *dev) | |||
{ | |||
nir_builder b; | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy"); | |||
b.shader->info.cs.local_size[0] = 64; | |||
b.shader->info.cs.local_size[1] = 1; | |||
b.shader->info.cs.local_size[2] = 1; | |||
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); | |||
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); | |||
nir_ssa_def *block_size = nir_imm_ivec4(&b, | |||
b.shader->info.cs.local_size[0], | |||
b.shader->info.cs.local_size[1], | |||
b.shader->info.cs.local_size[2], 0); | |||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); | |||
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); | |||
offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false); | |||
nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, | |||
nir_intrinsic_vulkan_resource_index); | |||
dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
nir_intrinsic_set_desc_set(dst_buf, 0); | |||
nir_intrinsic_set_binding(dst_buf, 0); | |||
nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); | |||
nir_builder_instr_insert(&b, &dst_buf->instr); | |||
nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, | |||
nir_intrinsic_vulkan_resource_index); | |||
src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
nir_intrinsic_set_desc_set(src_buf, 0); | |||
nir_intrinsic_set_binding(src_buf, 1); | |||
nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); | |||
nir_builder_instr_insert(&b, &src_buf->instr); | |||
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); | |||
load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); | |||
load->src[1] = nir_src_for_ssa(offset); | |||
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); | |||
load->num_components = 4; | |||
nir_builder_instr_insert(&b, &load->instr); | |||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); | |||
store->src[0] = nir_src_for_ssa(&load->dest.ssa); | |||
store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); | |||
store->src[2] = nir_src_for_ssa(offset); | |||
nir_intrinsic_set_write_mask(store, 0xf); | |||
store->num_components = 4; | |||
nir_builder_instr_insert(&b, &store->instr); | |||
return b.shader; | |||
} | |||
VkResult radv_device_init_meta_buffer_state(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
struct radv_shader_module fill_cs = { .nir = NULL }; | |||
struct radv_shader_module copy_cs = { .nir = NULL }; | |||
zero(device->meta_state.buffer); | |||
fill_cs.nir = build_buffer_fill_shader(device); | |||
copy_cs.nir = build_buffer_copy_shader(device); | |||
VkDescriptorSetLayoutCreateInfo fill_ds_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |||
.bindingCount = 1, | |||
.pBindings = (VkDescriptorSetLayoutBinding[]) { | |||
{ | |||
.binding = 0, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
} | |||
}; | |||
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), | |||
&fill_ds_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.buffer.fill_ds_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkDescriptorSetLayoutCreateInfo copy_ds_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |||
.bindingCount = 2, | |||
.pBindings = (VkDescriptorSetLayoutBinding[]) { | |||
{ | |||
.binding = 0, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
{ | |||
.binding = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
} | |||
}; | |||
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), | |||
©_ds_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.buffer.copy_ds_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkPipelineLayoutCreateInfo fill_pl_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |||
.setLayoutCount = 1, | |||
.pSetLayouts = &device->meta_state.buffer.fill_ds_layout, | |||
.pushConstantRangeCount = 1, | |||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4}, | |||
}; | |||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), | |||
&fill_pl_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.buffer.fill_p_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkPipelineLayoutCreateInfo copy_pl_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |||
.setLayoutCount = 1, | |||
.pSetLayouts = &device->meta_state.buffer.copy_ds_layout, | |||
.pushConstantRangeCount = 0, | |||
}; | |||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), | |||
©_pl_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.buffer.copy_p_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.module = radv_shader_module_to_handle(&fill_cs), | |||
.pName = "main", | |||
.pSpecializationInfo = NULL, | |||
}; | |||
VkComputePipelineCreateInfo fill_vk_pipeline_info = { | |||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |||
.stage = fill_pipeline_shader_stage, | |||
.flags = 0, | |||
.layout = device->meta_state.buffer.fill_p_layout, | |||
}; | |||
result = radv_CreateComputePipelines(radv_device_to_handle(device), | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
1, &fill_vk_pipeline_info, NULL, | |||
&device->meta_state.buffer.fill_pipeline); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.module = radv_shader_module_to_handle(©_cs), | |||
.pName = "main", | |||
.pSpecializationInfo = NULL, | |||
}; | |||
VkComputePipelineCreateInfo copy_vk_pipeline_info = { | |||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |||
.stage = copy_pipeline_shader_stage, | |||
.flags = 0, | |||
.layout = device->meta_state.buffer.copy_p_layout, | |||
}; | |||
result = radv_CreateComputePipelines(radv_device_to_handle(device), | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
1, ©_vk_pipeline_info, NULL, | |||
&device->meta_state.buffer.copy_pipeline); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
ralloc_free(fill_cs.nir); | |||
ralloc_free(copy_cs.nir); | |||
return VK_SUCCESS; | |||
fail: | |||
radv_device_finish_meta_buffer_state(device); | |||
ralloc_free(fill_cs.nir); | |||
ralloc_free(copy_cs.nir); | |||
return result; | |||
} | |||
void radv_device_finish_meta_buffer_state(struct radv_device *device) | |||
{ | |||
if (device->meta_state.buffer.copy_pipeline) | |||
radv_DestroyPipeline(radv_device_to_handle(device), | |||
device->meta_state.buffer.copy_pipeline, | |||
&device->meta_state.alloc); | |||
if (device->meta_state.buffer.fill_pipeline) | |||
radv_DestroyPipeline(radv_device_to_handle(device), | |||
device->meta_state.buffer.fill_pipeline, | |||
&device->meta_state.alloc); | |||
if (device->meta_state.buffer.copy_p_layout) | |||
radv_DestroyPipelineLayout(radv_device_to_handle(device), | |||
device->meta_state.buffer.copy_p_layout, | |||
&device->meta_state.alloc); | |||
if (device->meta_state.buffer.fill_p_layout) | |||
radv_DestroyPipelineLayout(radv_device_to_handle(device), | |||
device->meta_state.buffer.fill_p_layout, | |||
&device->meta_state.alloc); | |||
if (device->meta_state.buffer.copy_ds_layout) | |||
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), | |||
device->meta_state.buffer.copy_ds_layout, | |||
&device->meta_state.alloc); | |||
if (device->meta_state.buffer.fill_ds_layout) | |||
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), | |||
device->meta_state.buffer.fill_ds_layout, | |||
&device->meta_state.alloc); | |||
} | |||
static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, | |||
struct radeon_winsys_bo *bo, | |||
uint64_t offset, uint64_t size, uint32_t value) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
uint64_t block_count = round_up_u64(size, 1024); | |||
struct radv_meta_saved_compute_state saved_state; | |||
VkDescriptorSet ds; | |||
radv_meta_save_compute(&saved_state, cmd_buffer, 4); | |||
radv_temp_descriptor_set_create(device, cmd_buffer, | |||
device->meta_state.buffer.fill_ds_layout, | |||
&ds); | |||
struct radv_buffer dst_buffer = { | |||
.bo = bo, | |||
.offset = offset, | |||
.size = size | |||
}; | |||
radv_UpdateDescriptorSets(radv_device_to_handle(device), | |||
1, /* writeCount */ | |||
(VkWriteDescriptorSet[]) { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = ds, | |||
.dstBinding = 0, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |||
.pBufferInfo = &(VkDescriptorBufferInfo) { | |||
.buffer = radv_buffer_to_handle(&dst_buffer), | |||
.offset = 0, | |||
.range = size | |||
} | |||
} | |||
}, 0, NULL); | |||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, | |||
device->meta_state.buffer.fill_pipeline); | |||
radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, | |||
device->meta_state.buffer.fill_p_layout, 0, 1, | |||
&ds, 0, NULL); | |||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), | |||
device->meta_state.buffer.fill_p_layout, | |||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 4, | |||
&value); | |||
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); | |||
radv_temp_descriptor_set_destroy(device, ds); | |||
radv_meta_restore_compute(&saved_state, cmd_buffer, 4); | |||
} | |||
static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, | |||
struct radeon_winsys_bo *src_bo, | |||
struct radeon_winsys_bo *dst_bo, | |||
uint64_t src_offset, uint64_t dst_offset, | |||
uint64_t size) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
uint64_t block_count = round_up_u64(size, 1024); | |||
struct radv_meta_saved_compute_state saved_state; | |||
VkDescriptorSet ds; | |||
radv_meta_save_compute(&saved_state, cmd_buffer, 0); | |||
radv_temp_descriptor_set_create(device, cmd_buffer, | |||
device->meta_state.buffer.copy_ds_layout, | |||
&ds); | |||
struct radv_buffer dst_buffer = { | |||
.bo = dst_bo, | |||
.offset = dst_offset, | |||
.size = size | |||
}; | |||
struct radv_buffer src_buffer = { | |||
.bo = src_bo, | |||
.offset = src_offset, | |||
.size = size | |||
}; | |||
radv_UpdateDescriptorSets(radv_device_to_handle(device), | |||
2, /* writeCount */ | |||
(VkWriteDescriptorSet[]) { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = ds, | |||
.dstBinding = 0, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |||
.pBufferInfo = &(VkDescriptorBufferInfo) { | |||
.buffer = radv_buffer_to_handle(&dst_buffer), | |||
.offset = 0, | |||
.range = size | |||
} | |||
}, | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = ds, | |||
.dstBinding = 1, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |||
.pBufferInfo = &(VkDescriptorBufferInfo) { | |||
.buffer = radv_buffer_to_handle(&src_buffer), | |||
.offset = 0, | |||
.range = size | |||
} | |||
} | |||
}, 0, NULL); | |||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, | |||
device->meta_state.buffer.copy_pipeline); | |||
radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, | |||
device->meta_state.buffer.copy_p_layout, 0, 1, | |||
&ds, 0, NULL); | |||
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); | |||
radv_temp_descriptor_set_destroy(device, ds); | |||
radv_meta_restore_compute(&saved_state, cmd_buffer, 0); | |||
} | |||
void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, | |||
struct radeon_winsys_bo *bo, | |||
uint64_t offset, uint64_t size, uint32_t value) | |||
{ | |||
assert(!(offset & 3)); | |||
assert(!(size & 3)); | |||
if (size >= 4096) | |||
fill_buffer_shader(cmd_buffer, bo, offset, size, value); | |||
else if (size) { | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(bo); | |||
va += offset; | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8); | |||
si_cp_dma_clear_buffer(cmd_buffer, va, size, value); | |||
} | |||
} | |||
static | |||
void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, | |||
struct radeon_winsys_bo *src_bo, | |||
struct radeon_winsys_bo *dst_bo, | |||
uint64_t src_offset, uint64_t dst_offset, | |||
uint64_t size) | |||
{ | |||
if (size >= 4096 && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3)) | |||
copy_buffer_shader(cmd_buffer, src_bo, dst_bo, | |||
src_offset, dst_offset, size); | |||
else if (size) { | |||
uint64_t src_va = cmd_buffer->device->ws->buffer_get_va(src_bo); | |||
uint64_t dst_va = cmd_buffer->device->ws->buffer_get_va(dst_bo); | |||
src_va += src_offset; | |||
dst_va += dst_offset; | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, src_bo, 8); | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_bo, 8); | |||
si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size); | |||
} | |||
} | |||
void radv_CmdFillBuffer( | |||
VkCommandBuffer commandBuffer, | |||
VkBuffer dstBuffer, | |||
VkDeviceSize dstOffset, | |||
VkDeviceSize fillSize, | |||
uint32_t data) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); | |||
if (fillSize == VK_WHOLE_SIZE) | |||
fillSize = (dst_buffer->size - dstOffset) & ~3ull; | |||
radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset, | |||
fillSize, data); | |||
} | |||
void radv_CmdCopyBuffer( | |||
VkCommandBuffer commandBuffer, | |||
VkBuffer srcBuffer, | |||
VkBuffer destBuffer, | |||
uint32_t regionCount, | |||
const VkBufferCopy* pRegions) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer); | |||
RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer); | |||
for (unsigned r = 0; r < regionCount; r++) { | |||
uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; | |||
uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; | |||
uint64_t copy_size = pRegions[r].size; | |||
radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo, | |||
src_offset, dest_offset, copy_size); | |||
} | |||
} | |||
void radv_CmdUpdateBuffer( | |||
VkCommandBuffer commandBuffer, | |||
VkBuffer dstBuffer, | |||
VkDeviceSize dstOffset, | |||
VkDeviceSize dataSize, | |||
const uint32_t* pData) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); | |||
uint64_t words = dataSize / 4; | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo); | |||
va += dstOffset + dst_buffer->offset; | |||
assert(!(dataSize & 3)); | |||
assert(!(va & 3)); | |||
if (dataSize < 4096) { | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); | |||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4); | |||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0)); | |||
radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | | |||
S_370_WR_CONFIRM(1) | | |||
S_370_ENGINE_SEL(V_370_ME)); | |||
radeon_emit(cmd_buffer->cs, va); | |||
radeon_emit(cmd_buffer->cs, va >> 32); | |||
radeon_emit_array(cmd_buffer->cs, pData, words); | |||
} else { | |||
uint32_t buf_offset; | |||
radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset); | |||
radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, | |||
buf_offset, dstOffset + dst_buffer->offset, dataSize); | |||
} | |||
} |
@@ -0,0 +1,396 @@ | |||
#include "radv_meta.h" | |||
#include "nir/nir_builder.h" | |||
static nir_shader * | |||
build_nir_itob_compute_shader(struct radv_device *dev) | |||
{ | |||
nir_builder b; | |||
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, | |||
false, | |||
false, | |||
GLSL_TYPE_FLOAT); | |||
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, | |||
false, | |||
false, | |||
GLSL_TYPE_FLOAT); | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs"); | |||
b.shader->info.cs.local_size[0] = 16; | |||
b.shader->info.cs.local_size[1] = 16; | |||
b.shader->info.cs.local_size[2] = 1; | |||
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, | |||
sampler_type, "s_tex"); | |||
input_img->data.descriptor_set = 0; | |||
input_img->data.binding = 0; | |||
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, | |||
img_type, "out_img"); | |||
output_img->data.descriptor_set = 0; | |||
output_img->data.binding = 1; | |||
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); | |||
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); | |||
nir_ssa_def *block_size = nir_imm_ivec4(&b, | |||
b.shader->info.cs.local_size[0], | |||
b.shader->info.cs.local_size[1], | |||
b.shader->info.cs.local_size[2], 0); | |||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); | |||
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); | |||
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
offset->num_components = 2; | |||
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset"); | |||
nir_builder_instr_insert(&b, &offset->instr); | |||
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); | |||
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); | |||
stride->num_components = 1; | |||
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); | |||
nir_builder_instr_insert(&b, &stride->instr); | |||
nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); | |||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); | |||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D; | |||
tex->op = nir_texop_txf; | |||
tex->src[0].src_type = nir_tex_src_coord; | |||
tex->src[0].src = nir_src_for_ssa(img_coord); | |||
tex->src[1].src_type = nir_tex_src_lod; | |||
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
tex->dest_type = nir_type_float; | |||
tex->is_array = false; | |||
tex->coord_components = 2; | |||
tex->texture = nir_deref_var_create(tex, input_img); | |||
tex->sampler = NULL; | |||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); | |||
nir_builder_instr_insert(&b, &tex->instr); | |||
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); | |||
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); | |||
nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); | |||
tmp = nir_iadd(&b, tmp, pos_x); | |||
nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp); | |||
nir_ssa_def *outval = &tex->dest.ssa; | |||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); | |||
store->src[0] = nir_src_for_ssa(coord); | |||
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); | |||
store->src[2] = nir_src_for_ssa(outval); | |||
store->variables[0] = nir_deref_var_create(store, output_img); | |||
nir_builder_instr_insert(&b, &store->instr); | |||
return b.shader; | |||
} | |||
/* Image to buffer - don't write use image accessors */ | |||
static VkResult | |||
radv_device_init_meta_itob_state(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
struct radv_shader_module cs = { .nir = NULL }; | |||
zero(device->meta_state.itob); | |||
cs.nir = build_nir_itob_compute_shader(device); | |||
/* | |||
* two descriptors one for the image being sampled | |||
* one for the buffer being written. | |||
*/ | |||
VkDescriptorSetLayoutCreateInfo ds_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |||
.bindingCount = 2, | |||
.pBindings = (VkDescriptorSetLayoutBinding[]) { | |||
{ | |||
.binding = 0, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
{ | |||
.binding = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
} | |||
}; | |||
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), | |||
&ds_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.itob.img_ds_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkPipelineLayoutCreateInfo pl_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |||
.setLayoutCount = 1, | |||
.pSetLayouts = &device->meta_state.itob.img_ds_layout, | |||
.pushConstantRangeCount = 1, | |||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12}, | |||
}; | |||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), | |||
&pl_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.itob.img_p_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
/* compute shader */ | |||
VkPipelineShaderStageCreateInfo pipeline_shader_stage = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.module = radv_shader_module_to_handle(&cs), | |||
.pName = "main", | |||
.pSpecializationInfo = NULL, | |||
}; | |||
VkComputePipelineCreateInfo vk_pipeline_info = { | |||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |||
.stage = pipeline_shader_stage, | |||
.flags = 0, | |||
.layout = device->meta_state.itob.img_p_layout, | |||
}; | |||
result = radv_CreateComputePipelines(radv_device_to_handle(device), | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
1, &vk_pipeline_info, NULL, | |||
&device->meta_state.itob.pipeline); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
ralloc_free(cs.nir); | |||
return VK_SUCCESS; | |||
fail: | |||
ralloc_free(cs.nir); | |||
return result; | |||
} | |||
static void | |||
radv_device_finish_meta_itob_state(struct radv_device *device) | |||
{ | |||
if (device->meta_state.itob.img_p_layout) { | |||
radv_DestroyPipelineLayout(radv_device_to_handle(device), | |||
device->meta_state.itob.img_p_layout, | |||
&device->meta_state.alloc); | |||
} | |||
if (device->meta_state.itob.img_ds_layout) { | |||
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), | |||
device->meta_state.itob.img_ds_layout, | |||
&device->meta_state.alloc); | |||
} | |||
if (device->meta_state.itob.pipeline) { | |||
radv_DestroyPipeline(radv_device_to_handle(device), | |||
device->meta_state.itob.pipeline, | |||
&device->meta_state.alloc); | |||
} | |||
} | |||
void | |||
radv_device_finish_meta_bufimage_state(struct radv_device *device) | |||
{ | |||
radv_device_finish_meta_itob_state(device); | |||
} | |||
VkResult | |||
radv_device_init_meta_bufimage_state(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
result = radv_device_init_meta_itob_state(device); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
return VK_SUCCESS; | |||
} | |||
void | |||
radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_saved_compute_state *save) | |||
{ | |||
radv_meta_save_compute(save, cmd_buffer, 12); | |||
} | |||
void | |||
radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_saved_compute_state *save) | |||
{ | |||
radv_meta_restore_compute(save, cmd_buffer, 12); | |||
} | |||
static void | |||
create_iview(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_blit2d_surf *surf, | |||
VkImageUsageFlags usage, | |||
struct radv_image_view *iview) | |||
{ | |||
radv_image_view_init(iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = radv_image_to_handle(surf->image), | |||
.viewType = VK_IMAGE_VIEW_TYPE_2D, | |||
.format = surf->format, | |||
.subresourceRange = { | |||
.aspectMask = surf->aspect_mask, | |||
.baseMipLevel = surf->level, | |||
.levelCount = 1, | |||
.baseArrayLayer = surf->layer, | |||
.layerCount = 1 | |||
}, | |||
}, cmd_buffer, usage); | |||
} | |||
static void | |||
create_bview(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_buffer *buffer, | |||
unsigned offset, | |||
VkFormat format, | |||
struct radv_buffer_view *bview) | |||
{ | |||
radv_buffer_view_init(bview, cmd_buffer->device, | |||
&(VkBufferViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | |||
.flags = 0, | |||
.buffer = radv_buffer_to_handle(buffer), | |||
.format = format, | |||
.offset = offset, | |||
.range = VK_WHOLE_SIZE, | |||
}, cmd_buffer); | |||
} | |||
struct itob_temps { | |||
struct radv_image_view src_iview; | |||
struct radv_buffer_view dst_bview; | |||
VkDescriptorSet set; | |||
}; | |||
static void | |||
itob_bind_src_image(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_blit2d_surf *src, | |||
struct radv_meta_blit2d_rect *rect, | |||
struct itob_temps *tmp) | |||
{ | |||
create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->src_iview); | |||
} | |||
static void | |||
itob_bind_dst_buffer(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_blit2d_buffer *dst, | |||
struct radv_meta_blit2d_rect *rect, | |||
struct itob_temps *tmp) | |||
{ | |||
create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &tmp->dst_bview); | |||
} | |||
static void | |||
itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, | |||
struct itob_temps *tmp) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
VkDevice vk_device = radv_device_to_handle(cmd_buffer->device); | |||
radv_temp_descriptor_set_create(device, cmd_buffer, | |||
device->meta_state.itob.img_ds_layout, | |||
&tmp->set); | |||
radv_UpdateDescriptorSets(vk_device, | |||
2, /* writeCount */ | |||
(VkWriteDescriptorSet[]) { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = tmp->set, | |||
.dstBinding = 0, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, | |||
.pImageInfo = (VkDescriptorImageInfo[]) { | |||
{ | |||
.sampler = NULL, | |||
.imageView = radv_image_view_to_handle(&tmp->src_iview), | |||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL, | |||
}, | |||
} | |||
}, | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = tmp->set, | |||
.dstBinding = 1, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, | |||
.pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->dst_bview) }, | |||
} | |||
}, 0, NULL); | |||
radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, | |||
device->meta_state.itob.img_p_layout, 0, 1, | |||
&tmp->set, 0, NULL); | |||
} | |||
static void | |||
itob_unbind_src_image(struct radv_cmd_buffer *cmd_buffer, | |||
struct itob_temps *temps) | |||
{ | |||
} | |||
static void | |||
bind_pipeline(struct radv_cmd_buffer *cmd_buffer) | |||
{ | |||
VkPipeline pipeline = | |||
cmd_buffer->device->meta_state.itob.pipeline; | |||
if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { | |||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | |||
} | |||
} | |||
void | |||
radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_meta_blit2d_surf *src, | |||
struct radv_meta_blit2d_buffer *dst, | |||
unsigned num_rects, | |||
struct radv_meta_blit2d_rect *rects) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
for (unsigned r = 0; r < num_rects; ++r) { | |||
struct itob_temps temps; | |||
itob_bind_src_image(cmd_buffer, src, &rects[r], &temps); | |||
itob_bind_dst_buffer(cmd_buffer, dst, &rects[r], &temps); | |||
itob_bind_descriptors(cmd_buffer, &temps); | |||
bind_pipeline(cmd_buffer); | |||
unsigned push_constants[3] = { | |||
rects[r].src_x, | |||
rects[r].src_y, | |||
dst->pitch | |||
}; | |||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), | |||
device->meta_state.itob.img_p_layout, | |||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 12, | |||
push_constants); | |||
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); | |||
radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set); | |||
itob_unbind_src_image(cmd_buffer, &temps); | |||
} | |||
} |
@@ -0,0 +1,399 @@ | |||
/* | |||
* Copyright © 2016 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "radv_meta.h" | |||
#include "vk_format.h" | |||
static VkExtent3D | |||
meta_image_block_size(const struct radv_image *image) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(image->vk_format); | |||
return (VkExtent3D) { desc->block.width, desc->block.height, 1 }; | |||
} | |||
/* Returns the user-provided VkBufferImageCopy::imageExtent in units of | |||
* elements rather than texels. One element equals one texel or one block | |||
* if Image is uncompressed or compressed, respectively. | |||
*/ | |||
static struct VkExtent3D | |||
meta_region_extent_el(const struct radv_image *image, | |||
const struct VkExtent3D *extent) | |||
{ | |||
const VkExtent3D block = meta_image_block_size(image); | |||
return radv_sanitize_image_extent(image->type, (VkExtent3D) { | |||
.width = DIV_ROUND_UP(extent->width , block.width), | |||
.height = DIV_ROUND_UP(extent->height, block.height), | |||
.depth = DIV_ROUND_UP(extent->depth , block.depth), | |||
}); | |||
} | |||
/* Returns the user-provided VkBufferImageCopy::imageOffset in units of | |||
* elements rather than texels. One element equals one texel or one block | |||
* if Image is uncompressed or compressed, respectively. | |||
*/ | |||
static struct VkOffset3D | |||
meta_region_offset_el(const struct radv_image *image, | |||
const struct VkOffset3D *offset) | |||
{ | |||
const VkExtent3D block = meta_image_block_size(image); | |||
return radv_sanitize_image_offset(image->type, (VkOffset3D) { | |||
.x = offset->x / block.width, | |||
.y = offset->y / block.height, | |||
.z = offset->z / block.depth, | |||
}); | |||
} | |||
static VkFormat | |||
vk_format_for_size(int bs) | |||
{ | |||
switch (bs) { | |||
case 1: return VK_FORMAT_R8_UINT; | |||
case 2: return VK_FORMAT_R8G8_UINT; | |||
case 4: return VK_FORMAT_R8G8B8A8_UINT; | |||
case 8: return VK_FORMAT_R16G16B16A16_UINT; | |||
case 16: return VK_FORMAT_R32G32B32A32_UINT; | |||
default: | |||
unreachable("Invalid format block size"); | |||
} | |||
} | |||
static struct radv_meta_blit2d_surf | |||
blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags aspectMask, | |||
int level, int layer) | |||
{ | |||
VkFormat format = image->vk_format; | |||
if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) | |||
format = vk_format_depth_only(format); | |||
else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) | |||
format = vk_format_stencil_only(format); | |||
if (!image->surface.dcc_size) | |||
format = vk_format_for_size(vk_format_get_blocksize(format)); | |||
return (struct radv_meta_blit2d_surf) { | |||
.format = format, | |||
.bs = vk_format_get_blocksize(format), | |||
.level = level, | |||
.layer = layer, | |||
.image = image, | |||
.aspect_mask = aspectMask, | |||
}; | |||
} | |||
static void | |||
meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_buffer* buffer, | |||
struct radv_image* image, | |||
uint32_t regionCount, | |||
const VkBufferImageCopy* pRegions) | |||
{ | |||
struct radv_meta_saved_state saved_state; | |||
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to | |||
* VK_SAMPLE_COUNT_1_BIT." | |||
*/ | |||
assert(image->samples == 1); | |||
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); | |||
for (unsigned r = 0; r < regionCount; r++) { | |||
/** | |||
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images | |||
* extent is the size in texels of the source image to copy in width, | |||
* height and depth. 1D images use only x and width. 2D images use x, y, | |||
* width and height. 3D images use x, y, z, width, height and depth. | |||
* | |||
* | |||
* Also, convert the offsets and extent from units of texels to units of | |||
* blocks - which is the highest resolution accessible in this command. | |||
*/ | |||
const VkOffset3D img_offset_el = | |||
meta_region_offset_el(image, &pRegions[r].imageOffset); | |||
const VkExtent3D bufferExtent = { | |||
.width = pRegions[r].bufferRowLength ? | |||
pRegions[r].bufferRowLength : pRegions[r].imageExtent.width, | |||
.height = pRegions[r].bufferImageHeight ? | |||
pRegions[r].bufferImageHeight : pRegions[r].imageExtent.height, | |||
}; | |||
const VkExtent3D buf_extent_el = | |||
meta_region_extent_el(image, &bufferExtent); | |||
/* Start creating blit rect */ | |||
const VkExtent3D img_extent_el = | |||
meta_region_extent_el(image, &pRegions[r].imageExtent); | |||
struct radv_meta_blit2d_rect rect = { | |||
.width = img_extent_el.width, | |||
.height = img_extent_el.height, | |||
}; | |||
/* Create blit surfaces */ | |||
struct radv_meta_blit2d_surf img_bsurf = | |||
blit_surf_for_image_level_layer(image, | |||
pRegions[r].imageSubresource.aspectMask, | |||
pRegions[r].imageSubresource.mipLevel, | |||
pRegions[r].imageSubresource.baseArrayLayer); | |||
struct radv_meta_blit2d_buffer buf_bsurf = { | |||
.bs = img_bsurf.bs, | |||
.format = img_bsurf.format, | |||
.buffer = buffer, | |||
.offset = pRegions[r].bufferOffset, | |||
.pitch = buf_extent_el.width, | |||
}; | |||
/* Loop through each 3D or array slice */ | |||
unsigned num_slices_3d = img_extent_el.depth; | |||
unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; | |||
unsigned slice_3d = 0; | |||
unsigned slice_array = 0; | |||
while (slice_3d < num_slices_3d && slice_array < num_slices_array) { | |||
rect.dst_x = img_offset_el.x; | |||
rect.dst_y = img_offset_el.y; | |||
/* Perform Blit */ | |||
radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); | |||
/* Once we've done the blit, all of the actual information about | |||
* the image is embedded in the command buffer so we can just | |||
* increment the offset directly in the image effectively | |||
* re-binding it to different backing memory. | |||
*/ | |||
buf_bsurf.offset += buf_extent_el.width * | |||
buf_extent_el.height * buf_bsurf.bs; | |||
img_bsurf.layer++; | |||
if (image->type == VK_IMAGE_TYPE_3D) | |||
slice_3d++; | |||
else | |||
slice_array++; | |||
} | |||
} | |||
radv_meta_restore(&saved_state, cmd_buffer); | |||
} | |||
void radv_CmdCopyBufferToImage( | |||
VkCommandBuffer commandBuffer, | |||
VkBuffer srcBuffer, | |||
VkImage destImage, | |||
VkImageLayout destImageLayout, | |||
uint32_t regionCount, | |||
const VkBufferImageCopy* pRegions) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_image, dest_image, destImage); | |||
RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer); | |||
meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, | |||
regionCount, pRegions); | |||
} | |||
static void | |||
meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_buffer* buffer, | |||
struct radv_image* image, | |||
uint32_t regionCount, | |||
const VkBufferImageCopy* pRegions) | |||
{ | |||
struct radv_meta_saved_compute_state saved_state; | |||
radv_meta_begin_bufimage(cmd_buffer, &saved_state); | |||
for (unsigned r = 0; r < regionCount; r++) { | |||
/** | |||
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images | |||
* extent is the size in texels of the source image to copy in width, | |||
* height and depth. 1D images use only x and width. 2D images use x, y, | |||
* width and height. 3D images use x, y, z, width, height and depth. | |||
* | |||
* | |||
* Also, convert the offsets and extent from units of texels to units of | |||
* blocks - which is the highest resolution accessible in this command. | |||
*/ | |||
const VkOffset3D img_offset_el = | |||
meta_region_offset_el(image, &pRegions[r].imageOffset); | |||
const VkExtent3D bufferExtent = { | |||
.width = pRegions[r].bufferRowLength ? | |||
pRegions[r].bufferRowLength : pRegions[r].imageExtent.width, | |||
.height = pRegions[r].bufferImageHeight ? | |||
pRegions[r].bufferImageHeight : pRegions[r].imageExtent.height, | |||
}; | |||
const VkExtent3D buf_extent_el = | |||
meta_region_extent_el(image, &bufferExtent); | |||
/* Start creating blit rect */ | |||
const VkExtent3D img_extent_el = | |||
meta_region_extent_el(image, &pRegions[r].imageExtent); | |||
struct radv_meta_blit2d_rect rect = { | |||
.width = img_extent_el.width, | |||
.height = img_extent_el.height, | |||
}; | |||
/* Create blit surfaces */ | |||
struct radv_meta_blit2d_surf img_info = | |||
blit_surf_for_image_level_layer(image, | |||
pRegions[r].imageSubresource.aspectMask, | |||
pRegions[r].imageSubresource.mipLevel, | |||
pRegions[r].imageSubresource.baseArrayLayer); | |||
struct radv_meta_blit2d_buffer buf_info = { | |||
.bs = img_info.bs, | |||
.format = img_info.format, | |||
.buffer = buffer, | |||
.offset = pRegions[r].bufferOffset, | |||
.pitch = buf_extent_el.width, | |||
}; | |||
/* Loop through each 3D or array slice */ | |||
unsigned num_slices_3d = img_extent_el.depth; | |||
unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; | |||
unsigned slice_3d = 0; | |||
unsigned slice_array = 0; | |||
while (slice_3d < num_slices_3d && slice_array < num_slices_array) { | |||
rect.src_x = img_offset_el.x; | |||
rect.src_y = img_offset_el.y; | |||
/* Perform Blit */ | |||
radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect); | |||
buf_info.offset += buf_extent_el.width * | |||
buf_extent_el.height * buf_info.bs; | |||
img_info.layer++; | |||
if (image->type == VK_IMAGE_TYPE_3D) | |||
slice_3d++; | |||
else | |||
slice_array++; | |||
} | |||
} | |||
radv_meta_end_bufimage(cmd_buffer, &saved_state); | |||
} | |||
void radv_CmdCopyImageToBuffer( | |||
VkCommandBuffer commandBuffer, | |||
VkImage srcImage, | |||
VkImageLayout srcImageLayout, | |||
VkBuffer destBuffer, | |||
uint32_t regionCount, | |||
const VkBufferImageCopy* pRegions) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_image, src_image, srcImage); | |||
RADV_FROM_HANDLE(radv_buffer, dst_buffer, destBuffer); | |||
meta_copy_image_to_buffer(cmd_buffer, dst_buffer, src_image, | |||
regionCount, pRegions); | |||
} | |||
void radv_CmdCopyImage( | |||
VkCommandBuffer commandBuffer, | |||
VkImage srcImage, | |||
VkImageLayout srcImageLayout, | |||
VkImage destImage, | |||
VkImageLayout destImageLayout, | |||
uint32_t regionCount, | |||
const VkImageCopy* pRegions) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_image, src_image, srcImage); | |||
RADV_FROM_HANDLE(radv_image, dest_image, destImage); | |||
struct radv_meta_saved_state saved_state; | |||
/* From the Vulkan 1.0 spec: | |||
* | |||
* vkCmdCopyImage can be used to copy image data between multisample | |||
* images, but both images must have the same number of samples. | |||
*/ | |||
assert(src_image->samples == dest_image->samples); | |||
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); | |||
for (unsigned r = 0; r < regionCount; r++) { | |||
assert(pRegions[r].srcSubresource.aspectMask == | |||
pRegions[r].dstSubresource.aspectMask); | |||
/* Create blit surfaces */ | |||
struct radv_meta_blit2d_surf b_src = | |||
blit_surf_for_image_level_layer(src_image, | |||
pRegions[r].srcSubresource.aspectMask, | |||
pRegions[r].srcSubresource.mipLevel, | |||
pRegions[r].srcSubresource.baseArrayLayer); | |||
struct radv_meta_blit2d_surf b_dst = | |||
blit_surf_for_image_level_layer(dest_image, | |||
pRegions[r].dstSubresource.aspectMask, | |||
pRegions[r].dstSubresource.mipLevel, | |||
pRegions[r].dstSubresource.baseArrayLayer); | |||
/* for DCC */ | |||
b_src.format = b_dst.format; | |||
/** | |||
* From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images | |||
* imageExtent is the size in texels of the image to copy in width, height | |||
* and depth. 1D images use only x and width. 2D images use x, y, width | |||
* and height. 3D images use x, y, z, width, height and depth. | |||
* | |||
* Also, convert the offsets and extent from units of texels to units of | |||
* blocks - which is the highest resolution accessible in this command. | |||
*/ | |||
const VkOffset3D dst_offset_el = | |||
meta_region_offset_el(dest_image, &pRegions[r].dstOffset); | |||
const VkOffset3D src_offset_el = | |||
meta_region_offset_el(src_image, &pRegions[r].srcOffset); | |||
const VkExtent3D img_extent_el = | |||
meta_region_extent_el(src_image, &pRegions[r].extent); | |||
/* Start creating blit rect */ | |||
struct radv_meta_blit2d_rect rect = { | |||
.width = img_extent_el.width, | |||
.height = img_extent_el.height, | |||
}; | |||
/* Loop through each 3D or array slice */ | |||
unsigned num_slices_3d = img_extent_el.depth; | |||
unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; | |||
unsigned slice_3d = 0; | |||
unsigned slice_array = 0; | |||
while (slice_3d < num_slices_3d && slice_array < num_slices_array) { | |||
/* Finish creating blit rect */ | |||
rect.dst_x = dst_offset_el.x; | |||
rect.dst_y = dst_offset_el.y; | |||
rect.src_x = src_offset_el.x; | |||
rect.src_y = src_offset_el.y; | |||
/* Perform Blit */ | |||
radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect); | |||
b_src.layer++; | |||
b_dst.layer++; | |||
if (dest_image->type == VK_IMAGE_TYPE_3D) | |||
slice_3d++; | |||
else | |||
slice_array++; | |||
} | |||
} | |||
radv_meta_restore(&saved_state, cmd_buffer); | |||
} |
@@ -0,0 +1,463 @@ | |||
/* | |||
* Copyright © 2016 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include "radv_meta.h" | |||
#include "radv_private.h" | |||
#include "nir/nir_builder.h" | |||
#include "sid.h" | |||
/** | |||
* Vertex attributes used by all pipelines. | |||
*/ | |||
struct vertex_attrs { | |||
float position[2]; /**< 3DPRIM_RECTLIST */ | |||
}; | |||
/* passthrough vertex shader */ | |||
static nir_shader * | |||
build_nir_vs(void) | |||
{ | |||
const struct glsl_type *vec4 = glsl_vec4_type(); | |||
nir_builder b; | |||
nir_variable *a_position; | |||
nir_variable *v_position; | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, "meta_depth_decomp_vs"); | |||
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"a_position"); | |||
a_position->data.location = VERT_ATTRIB_GENERIC0; | |||
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"gl_Position"); | |||
v_position->data.location = VARYING_SLOT_POS; | |||
nir_copy_var(&b, v_position, a_position); | |||
return b.shader; | |||
} | |||
/* simple passthrough shader */ | |||
static nir_shader * | |||
build_nir_fs(void) | |||
{ | |||
nir_builder b; | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); | |||
b.shader->info.name = ralloc_asprintf(b.shader, | |||
"meta_depth_decomp_noop_fs"); | |||
return b.shader; | |||
} | |||
static VkResult | |||
create_pass(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc; | |||
VkAttachmentDescription attachment; | |||
attachment.format = VK_FORMAT_UNDEFINED; | |||
attachment.samples = 1; | |||
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; | |||
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; | |||
attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | |||
attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | |||
result = radv_CreateRenderPass(device_h, | |||
&(VkRenderPassCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | |||
.attachmentCount = 1, | |||
.pAttachments = &attachment, | |||
.subpassCount = 1, | |||
.pSubpasses = &(VkSubpassDescription) { | |||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | |||
.inputAttachmentCount = 0, | |||
.colorAttachmentCount = 0, | |||
.pColorAttachments = NULL, | |||
.pResolveAttachments = NULL, | |||
.pDepthStencilAttachment = &(VkAttachmentReference) { | |||
.attachment = 0, | |||
.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, | |||
}, | |||
.preserveAttachmentCount = 0, | |||
.pPreserveAttachments = NULL, | |||
}, | |||
.dependencyCount = 0, | |||
}, | |||
alloc, | |||
&device->meta_state.depth_decomp.pass); | |||
return result; | |||
} | |||
static VkResult | |||
create_pipeline(struct radv_device *device, | |||
VkShaderModule vs_module_h) | |||
{ | |||
VkResult result; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
struct radv_shader_module fs_module = { | |||
.nir = build_nir_fs(), | |||
}; | |||
if (!fs_module.nir) { | |||
/* XXX: Need more accurate error */ | |||
result = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto cleanup; | |||
} | |||
const VkGraphicsPipelineCreateInfo pipeline_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | |||
.stageCount = 2, | |||
.pStages = (VkPipelineShaderStageCreateInfo[]) { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_VERTEX_BIT, | |||
.module = vs_module_h, | |||
.pName = "main", | |||
}, | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT, | |||
.module = radv_shader_module_to_handle(&fs_module), | |||
.pName = "main", | |||
}, | |||
}, | |||
.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | |||
.vertexBindingDescriptionCount = 1, | |||
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { | |||
{ | |||
.binding = 0, | |||
.stride = sizeof(struct vertex_attrs), | |||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX | |||
}, | |||
}, | |||
.vertexAttributeDescriptionCount = 1, | |||
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { | |||
{ | |||
/* Position */ | |||
.location = 0, | |||
.binding = 0, | |||
.format = VK_FORMAT_R32G32_SFLOAT, | |||
.offset = offsetof(struct vertex_attrs, position), | |||
}, | |||
}, | |||
}, | |||
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | |||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, | |||
.primitiveRestartEnable = false, | |||
}, | |||
.pViewportState = &(VkPipelineViewportStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | |||
.viewportCount = 0, | |||
.scissorCount = 0, | |||
}, | |||
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | |||
.depthClampEnable = false, | |||
.rasterizerDiscardEnable = false, | |||
.polygonMode = VK_POLYGON_MODE_FILL, | |||
.cullMode = VK_CULL_MODE_NONE, | |||
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, | |||
}, | |||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | |||
.rasterizationSamples = 1, | |||
.sampleShadingEnable = false, | |||
.pSampleMask = NULL, | |||
.alphaToCoverageEnable = false, | |||
.alphaToOneEnable = false, | |||
}, | |||
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | |||
.logicOpEnable = false, | |||
.attachmentCount = 0, | |||
.pAttachments = NULL, | |||
}, | |||
.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | |||
.depthTestEnable = false, | |||
.depthWriteEnable = false, | |||
.depthBoundsTestEnable = false, | |||
.stencilTestEnable = false, | |||
}, | |||
.pDynamicState = NULL, | |||
.renderPass = device->meta_state.depth_decomp.pass, | |||
.subpass = 0, | |||
}; | |||
result = radv_graphics_pipeline_create(device_h, | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&pipeline_create_info, | |||
&(struct radv_graphics_pipeline_create_info) { | |||
.use_rectlist = true, | |||
.db_flush_depth_inplace = true, | |||
.db_flush_stencil_inplace = true, | |||
}, | |||
&device->meta_state.alloc, | |||
&device->meta_state.depth_decomp.decompress_pipeline); | |||
if (result != VK_SUCCESS) | |||
goto cleanup; | |||
result = radv_graphics_pipeline_create(device_h, | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&pipeline_create_info, | |||
&(struct radv_graphics_pipeline_create_info) { | |||
.use_rectlist = true, | |||
.db_flush_depth_inplace = true, | |||
.db_flush_stencil_inplace = true, | |||
.db_resummarize = true, | |||
}, | |||
&device->meta_state.alloc, | |||
&device->meta_state.depth_decomp.resummarize_pipeline); | |||
if (result != VK_SUCCESS) | |||
goto cleanup; | |||
goto cleanup; | |||
cleanup: | |||
ralloc_free(fs_module.nir); | |||
return result; | |||
} | |||
void | |||
radv_device_finish_meta_depth_decomp_state(struct radv_device *device) | |||
{ | |||
struct radv_meta_state *state = &device->meta_state; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
VkRenderPass pass_h = device->meta_state.depth_decomp.pass; | |||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc; | |||
if (pass_h) | |||
RADV_CALL(DestroyRenderPass)(device_h, pass_h, | |||
&device->meta_state.alloc); | |||
VkPipeline pipeline_h = state->depth_decomp.decompress_pipeline; | |||
if (pipeline_h) { | |||
RADV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); | |||
} | |||
pipeline_h = state->depth_decomp.resummarize_pipeline; | |||
if (pipeline_h) { | |||
RADV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); | |||
} | |||
} | |||
VkResult | |||
radv_device_init_meta_depth_decomp_state(struct radv_device *device) | |||
{ | |||
VkResult res = VK_SUCCESS; | |||
zero(device->meta_state.depth_decomp); | |||
struct radv_shader_module vs_module = { .nir = build_nir_vs() }; | |||
if (!vs_module.nir) { | |||
/* XXX: Need more accurate error */ | |||
res = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto fail; | |||
} | |||
res = create_pass(device); | |||
if (res != VK_SUCCESS) | |||
goto fail; | |||
VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); | |||
res = create_pipeline(device, vs_module_h); | |||
if (res != VK_SUCCESS) | |||
goto fail; | |||
goto cleanup; | |||
fail: | |||
radv_device_finish_meta_depth_decomp_state(device); | |||
cleanup: | |||
ralloc_free(vs_module.nir); | |||
return res; | |||
} | |||
static void | |||
emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer, | |||
const VkOffset2D *dest_offset, | |||
const VkExtent2D *depth_decomp_extent, | |||
VkPipeline pipeline_h) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); | |||
uint32_t offset; | |||
const struct vertex_attrs vertex_data[3] = { | |||
{ | |||
.position = { | |||
dest_offset->x, | |||
dest_offset->y, | |||
}, | |||
}, | |||
{ | |||
.position = { | |||
dest_offset->x, | |||
dest_offset->y + depth_decomp_extent->height, | |||
}, | |||
}, | |||
{ | |||
.position = { | |||
dest_offset->x + depth_decomp_extent->width, | |||
dest_offset->y, | |||
}, | |||
}, | |||
}; | |||
radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset); | |||
struct radv_buffer vertex_buffer = { | |||
.device = device, | |||
.size = sizeof(vertex_data), | |||
.bo = cmd_buffer->upload.upload_bo, | |||
.offset = offset, | |||
}; | |||
VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer); | |||
radv_CmdBindVertexBuffers(cmd_buffer_h, | |||
/*firstBinding*/ 0, | |||
/*bindingCount*/ 1, | |||
(VkBuffer[]) { vertex_buffer_h }, | |||
(VkDeviceSize[]) { 0 }); | |||
RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h); | |||
if (cmd_buffer->state.pipeline != pipeline) { | |||
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, | |||
pipeline_h); | |||
} | |||
RADV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); | |||
} | |||
static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image, | |||
VkImageSubresourceRange *subresourceRange, | |||
VkPipeline pipeline_h) | |||
{ | |||
struct radv_meta_saved_state saved_state; | |||
struct radv_meta_saved_pass_state saved_pass_state; | |||
VkDevice device_h = radv_device_to_handle(cmd_buffer->device); | |||
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); | |||
uint32_t width = radv_minify(image->extent.width, | |||
subresourceRange->baseMipLevel); | |||
uint32_t height = radv_minify(image->extent.height, | |||
subresourceRange->baseMipLevel); | |||
if (!image->htile.size) | |||
return; | |||
radv_meta_save_pass(&saved_pass_state, cmd_buffer); | |||
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); | |||
for (uint32_t layer = 0; layer < subresourceRange->layerCount; layer++) { | |||
struct radv_image_view iview; | |||
radv_image_view_init(&iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = radv_image_to_handle(image), | |||
.format = image->vk_format, | |||
.subresourceRange = { | |||
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, | |||
.baseMipLevel = subresourceRange->baseMipLevel, | |||
.levelCount = 1, | |||
.baseArrayLayer = subresourceRange->baseArrayLayer + layer, | |||
.layerCount = 1, | |||
}, | |||
}, | |||
cmd_buffer, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); | |||
VkFramebuffer fb_h; | |||
radv_CreateFramebuffer(device_h, | |||
&(VkFramebufferCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | |||
.attachmentCount = 1, | |||
.pAttachments = (VkImageView[]) { | |||
radv_image_view_to_handle(&iview) | |||
}, | |||
.width = width, | |||
.height = height, | |||
.layers = 1 | |||
}, | |||
&cmd_buffer->pool->alloc, | |||
&fb_h); | |||
RADV_CALL(CmdBeginRenderPass)(cmd_buffer_h, | |||
&(VkRenderPassBeginInfo) { | |||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | |||
.renderPass = cmd_buffer->device->meta_state.depth_decomp.pass, | |||
.framebuffer = fb_h, | |||
.renderArea = { | |||
.offset = { | |||
0, | |||
0, | |||
}, | |||
.extent = { | |||
width, | |||
height, | |||
} | |||
}, | |||
.clearValueCount = 0, | |||
.pClearValues = NULL, | |||
}, | |||
VK_SUBPASS_CONTENTS_INLINE); | |||
emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h); | |||
RADV_CALL(CmdEndRenderPass)(cmd_buffer_h); | |||
radv_DestroyFramebuffer(device_h, fb_h, | |||
&cmd_buffer->pool->alloc); | |||
} | |||
radv_meta_restore(&saved_state, cmd_buffer); | |||
radv_meta_restore_pass(&saved_pass_state, cmd_buffer); | |||
} | |||
void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image, | |||
VkImageSubresourceRange *subresourceRange) | |||
{ | |||
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, | |||
cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline); | |||
} | |||
void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image, | |||
VkImageSubresourceRange *subresourceRange) | |||
{ | |||
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, | |||
cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline); | |||
} |
@@ -0,0 +1,536 @@ | |||
/* | |||
* Copyright © 2016 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include "radv_meta.h" | |||
#include "radv_private.h" | |||
#include "nir/nir_builder.h" | |||
#include "sid.h" | |||
/** | |||
* Vertex attributes used by all pipelines. | |||
*/ | |||
struct vertex_attrs { | |||
float position[2]; /**< 3DPRIM_RECTLIST */ | |||
float tex_position[2]; | |||
}; | |||
/* passthrough vertex shader */ | |||
static nir_shader * | |||
build_nir_vs(void) | |||
{ | |||
const struct glsl_type *vec4 = glsl_vec4_type(); | |||
nir_builder b; | |||
nir_variable *a_position; | |||
nir_variable *v_position; | |||
nir_variable *a_tex_position; | |||
nir_variable *v_tex_position; | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, "meta_fast_clear_vs"); | |||
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"a_position"); | |||
a_position->data.location = VERT_ATTRIB_GENERIC0; | |||
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"gl_Position"); | |||
v_position->data.location = VARYING_SLOT_POS; | |||
a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"a_tex_position"); | |||
a_tex_position->data.location = VERT_ATTRIB_GENERIC1; | |||
v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"v_tex_position"); | |||
v_tex_position->data.location = VARYING_SLOT_VAR0; | |||
nir_copy_var(&b, v_position, a_position); | |||
nir_copy_var(&b, v_tex_position, a_tex_position); | |||
return b.shader; | |||
} | |||
/* simple passthrough shader */ | |||
static nir_shader * | |||
build_nir_fs(void) | |||
{ | |||
const struct glsl_type *vec4 = glsl_vec4_type(); | |||
nir_builder b; | |||
nir_variable *v_tex_position; /* vec4, varying texture coordinate */ | |||
nir_variable *f_color; /* vec4, fragment output color */ | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); | |||
b.shader->info.name = ralloc_asprintf(b.shader, | |||
"meta_fast_clear_fs"); | |||
v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"v_tex_position"); | |||
v_tex_position->data.location = VARYING_SLOT_VAR0; | |||
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"f_color"); | |||
f_color->data.location = FRAG_RESULT_DATA0; | |||
nir_copy_var(&b, f_color, v_tex_position); | |||
return b.shader; | |||
} | |||
static VkResult | |||
create_pass(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc; | |||
VkAttachmentDescription attachment; | |||
attachment.format = VK_FORMAT_UNDEFINED; | |||
attachment.samples = 1; | |||
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; | |||
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; | |||
attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL; | |||
attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL; | |||
result = radv_CreateRenderPass(device_h, | |||
&(VkRenderPassCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | |||
.attachmentCount = 1, | |||
.pAttachments = &attachment, | |||
.subpassCount = 1, | |||
.pSubpasses = &(VkSubpassDescription) { | |||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | |||
.inputAttachmentCount = 0, | |||
.colorAttachmentCount = 1, | |||
.pColorAttachments = (VkAttachmentReference[]) { | |||
{ | |||
.attachment = 0, | |||
.layout = VK_IMAGE_LAYOUT_GENERAL, | |||
}, | |||
}, | |||
.pResolveAttachments = NULL, | |||
.pDepthStencilAttachment = &(VkAttachmentReference) { | |||
.attachment = VK_ATTACHMENT_UNUSED, | |||
}, | |||
.preserveAttachmentCount = 0, | |||
.pPreserveAttachments = NULL, | |||
}, | |||
.dependencyCount = 0, | |||
}, | |||
alloc, | |||
&device->meta_state.fast_clear_flush.pass); | |||
return result; | |||
} | |||
static VkResult | |||
create_pipeline(struct radv_device *device, | |||
VkShaderModule vs_module_h) | |||
{ | |||
VkResult result; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
struct radv_shader_module fs_module = { | |||
.nir = build_nir_fs(), | |||
}; | |||
if (!fs_module.nir) { | |||
/* XXX: Need more accurate error */ | |||
result = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto cleanup; | |||
} | |||
const VkPipelineShaderStageCreateInfo stages[2] = { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_VERTEX_BIT, | |||
.module = vs_module_h, | |||
.pName = "main", | |||
}, | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT, | |||
.module = radv_shader_module_to_handle(&fs_module), | |||
.pName = "main", | |||
}, | |||
}; | |||
const VkPipelineVertexInputStateCreateInfo vi_state = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | |||
.vertexBindingDescriptionCount = 1, | |||
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { | |||
{ | |||
.binding = 0, | |||
.stride = sizeof(struct vertex_attrs), | |||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX | |||
}, | |||
}, | |||
.vertexAttributeDescriptionCount = 2, | |||
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { | |||
{ | |||
/* Position */ | |||
.location = 0, | |||
.binding = 0, | |||
.format = VK_FORMAT_R32G32_SFLOAT, | |||
.offset = offsetof(struct vertex_attrs, position), | |||
}, | |||
{ | |||
/* Texture Coordinate */ | |||
.location = 1, | |||
.binding = 0, | |||
.format = VK_FORMAT_R32G32_SFLOAT, | |||
.offset = offsetof(struct vertex_attrs, tex_position), | |||
}, | |||
} | |||
}; | |||
const VkPipelineInputAssemblyStateCreateInfo ia_state = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | |||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, | |||
.primitiveRestartEnable = false, | |||
}; | |||
const VkPipelineColorBlendStateCreateInfo blend_state = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | |||
.logicOpEnable = false, | |||
.attachmentCount = 1, | |||
.pAttachments = (VkPipelineColorBlendAttachmentState []) { | |||
{ | |||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | | |||
VK_COLOR_COMPONENT_G_BIT | | |||
VK_COLOR_COMPONENT_B_BIT | | |||
VK_COLOR_COMPONENT_A_BIT, | |||
}, | |||
} | |||
}; | |||
const VkPipelineRasterizationStateCreateInfo rs_state = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | |||
.depthClampEnable = false, | |||
.rasterizerDiscardEnable = false, | |||
.polygonMode = VK_POLYGON_MODE_FILL, | |||
.cullMode = VK_CULL_MODE_NONE, | |||
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, | |||
}; | |||
result = radv_graphics_pipeline_create(device_h, | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&(VkGraphicsPipelineCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | |||
.stageCount = 2, | |||
.pStages = stages, | |||
.pVertexInputState = &vi_state, | |||
.pInputAssemblyState = &ia_state, | |||
.pViewportState = &(VkPipelineViewportStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | |||
.viewportCount = 0, | |||
.scissorCount = 0, | |||
}, | |||
.pRasterizationState = &rs_state, | |||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | |||
.rasterizationSamples = 1, | |||
.sampleShadingEnable = false, | |||
.pSampleMask = NULL, | |||
.alphaToCoverageEnable = false, | |||
.alphaToOneEnable = false, | |||
}, | |||
.pColorBlendState = &blend_state, | |||
.pDynamicState = NULL, | |||
.renderPass = device->meta_state.fast_clear_flush.pass, | |||
.subpass = 0, | |||
}, | |||
&(struct radv_graphics_pipeline_create_info) { | |||
.use_rectlist = true, | |||
.custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR, | |||
}, | |||
&device->meta_state.alloc, | |||
&device->meta_state.fast_clear_flush.cmask_eliminate_pipeline); | |||
if (result != VK_SUCCESS) | |||
goto cleanup; | |||
result = radv_graphics_pipeline_create(device_h, | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&(VkGraphicsPipelineCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | |||
.stageCount = 2, | |||
.pStages = stages, | |||
.pVertexInputState = &vi_state, | |||
.pInputAssemblyState = &ia_state, | |||
.pViewportState = &(VkPipelineViewportStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | |||
.viewportCount = 0, | |||
.scissorCount = 0, | |||
}, | |||
.pRasterizationState = &rs_state, | |||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | |||
.rasterizationSamples = 1, | |||
.sampleShadingEnable = false, | |||
.pSampleMask = NULL, | |||
.alphaToCoverageEnable = false, | |||
.alphaToOneEnable = false, | |||
}, | |||
.pColorBlendState = &blend_state, | |||
.pDynamicState = NULL, | |||
.renderPass = device->meta_state.fast_clear_flush.pass, | |||
.subpass = 0, | |||
}, | |||
&(struct radv_graphics_pipeline_create_info) { | |||
.use_rectlist = true, | |||
.custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS, | |||
}, | |||
&device->meta_state.alloc, | |||
&device->meta_state.fast_clear_flush.fmask_decompress_pipeline); | |||
if (result != VK_SUCCESS) | |||
goto cleanup_cmask; | |||
goto cleanup; | |||
cleanup_cmask: | |||
RADV_CALL(DestroyPipeline)(device_h, device->meta_state.fast_clear_flush.cmask_eliminate_pipeline, &device->meta_state.alloc); | |||
cleanup: | |||
ralloc_free(fs_module.nir); | |||
return result; | |||
} | |||
void | |||
radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device) | |||
{ | |||
struct radv_meta_state *state = &device->meta_state; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
VkRenderPass pass_h = device->meta_state.fast_clear_flush.pass; | |||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc; | |||
if (pass_h) | |||
RADV_CALL(DestroyRenderPass)(device_h, pass_h, | |||
&device->meta_state.alloc); | |||
VkPipeline pipeline_h = state->fast_clear_flush.cmask_eliminate_pipeline; | |||
if (pipeline_h) { | |||
RADV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); | |||
} | |||
pipeline_h = state->fast_clear_flush.fmask_decompress_pipeline; | |||
if (pipeline_h) { | |||
RADV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); | |||
} | |||
} | |||
VkResult | |||
radv_device_init_meta_fast_clear_flush_state(struct radv_device *device) | |||
{ | |||
VkResult res = VK_SUCCESS; | |||
zero(device->meta_state.fast_clear_flush); | |||
struct radv_shader_module vs_module = { .nir = build_nir_vs() }; | |||
if (!vs_module.nir) { | |||
/* XXX: Need more accurate error */ | |||
res = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto fail; | |||
} | |||
res = create_pass(device); | |||
if (res != VK_SUCCESS) | |||
goto fail; | |||
VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); | |||
res = create_pipeline(device, vs_module_h); | |||
if (res != VK_SUCCESS) | |||
goto fail; | |||
goto cleanup; | |||
fail: | |||
radv_device_finish_meta_fast_clear_flush_state(device); | |||
cleanup: | |||
ralloc_free(vs_module.nir); | |||
return res; | |||
} | |||
static void | |||
emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer, | |||
const VkExtent2D *resolve_extent, | |||
bool fmask_decompress) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); | |||
uint32_t offset; | |||
const struct vertex_attrs vertex_data[3] = { | |||
{ | |||
.position = { | |||
0, | |||
0, | |||
}, | |||
.tex_position = { | |||
0, | |||
0, | |||
}, | |||
}, | |||
{ | |||
.position = { | |||
0, | |||
resolve_extent->height, | |||
}, | |||
.tex_position = { | |||
0, | |||
resolve_extent->height, | |||
}, | |||
}, | |||
{ | |||
.position = { | |||
resolve_extent->width, | |||
0, | |||
}, | |||
.tex_position = { | |||
resolve_extent->width, | |||
0, | |||
}, | |||
}, | |||
}; | |||
cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | | |||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META); | |||
radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset); | |||
struct radv_buffer vertex_buffer = { | |||
.device = device, | |||
.size = sizeof(vertex_data), | |||
.bo = cmd_buffer->upload.upload_bo, | |||
.offset = offset, | |||
}; | |||
VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer); | |||
radv_CmdBindVertexBuffers(cmd_buffer_h, | |||
/*firstBinding*/ 0, | |||
/*bindingCount*/ 1, | |||
(VkBuffer[]) { vertex_buffer_h }, | |||
(VkDeviceSize[]) { 0 }); | |||
VkPipeline pipeline_h; | |||
if (fmask_decompress) | |||
pipeline_h = device->meta_state.fast_clear_flush.fmask_decompress_pipeline; | |||
else | |||
pipeline_h = device->meta_state.fast_clear_flush.cmask_eliminate_pipeline; | |||
RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h); | |||
if (cmd_buffer->state.pipeline != pipeline) { | |||
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, | |||
pipeline_h); | |||
} | |||
RADV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); | |||
cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | | |||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META); | |||
si_emit_cache_flush(cmd_buffer); | |||
} | |||
/** | |||
*/ | |||
void | |||
radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *image) | |||
{ | |||
struct radv_meta_saved_state saved_state; | |||
struct radv_meta_saved_pass_state saved_pass_state; | |||
VkDevice device_h = radv_device_to_handle(cmd_buffer->device); | |||
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); | |||
if (!image->cmask.size) | |||
return; | |||
if (!cmd_buffer->device->allow_fast_clears) | |||
return; | |||
radv_meta_save_pass(&saved_pass_state, cmd_buffer); | |||
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); | |||
struct radv_image_view iview; | |||
radv_image_view_init(&iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = radv_image_to_handle(image), | |||
.format = image->vk_format, | |||
.subresourceRange = { | |||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | |||
.baseMipLevel = 0, | |||
.levelCount = 1, | |||
.baseArrayLayer = 0, | |||
.layerCount = 1, | |||
}, | |||
}, | |||
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); | |||
VkFramebuffer fb_h; | |||
radv_CreateFramebuffer(device_h, | |||
&(VkFramebufferCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | |||
.attachmentCount = 1, | |||
.pAttachments = (VkImageView[]) { | |||
radv_image_view_to_handle(&iview) | |||
}, | |||
.width = image->extent.width, | |||
.height = image->extent.height, | |||
.layers = 1 | |||
}, | |||
&cmd_buffer->pool->alloc, | |||
&fb_h); | |||
RADV_CALL(CmdBeginRenderPass)(cmd_buffer_h, | |||
&(VkRenderPassBeginInfo) { | |||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | |||
.renderPass = cmd_buffer->device->meta_state.fast_clear_flush.pass, | |||
.framebuffer = fb_h, | |||
.renderArea = { | |||
.offset = { | |||
0, | |||
0, | |||
}, | |||
.extent = { | |||
image->extent.width, | |||
image->extent.height, | |||
} | |||
}, | |||
.clearValueCount = 0, | |||
.pClearValues = NULL, | |||
}, | |||
VK_SUBPASS_CONTENTS_INLINE); | |||
emit_fast_clear_flush(cmd_buffer, | |||
&(VkExtent2D) { image->extent.width, image->extent.height }, | |||
image->fmask.size > 0); | |||
RADV_CALL(CmdEndRenderPass)(cmd_buffer_h); | |||
radv_DestroyFramebuffer(device_h, fb_h, | |||
&cmd_buffer->pool->alloc); | |||
radv_meta_restore(&saved_state, cmd_buffer); | |||
radv_meta_restore_pass(&saved_pass_state, cmd_buffer); | |||
} |
@@ -0,0 +1,670 @@ | |||
/* | |||
* Copyright © 2016 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include "radv_meta.h" | |||
#include "radv_private.h" | |||
#include "nir/nir_builder.h" | |||
#include "sid.h" | |||
/** | |||
* Vertex attributes used by all pipelines. | |||
*/ | |||
struct vertex_attrs { | |||
float position[2]; /**< 3DPRIM_RECTLIST */ | |||
float tex_position[2]; | |||
}; | |||
/* passthrough vertex shader */ | |||
static nir_shader * | |||
build_nir_vs(void) | |||
{ | |||
const struct glsl_type *vec4 = glsl_vec4_type(); | |||
nir_builder b; | |||
nir_variable *a_position; | |||
nir_variable *v_position; | |||
nir_variable *a_tex_position; | |||
nir_variable *v_tex_position; | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); | |||
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"a_position"); | |||
a_position->data.location = VERT_ATTRIB_GENERIC0; | |||
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"gl_Position"); | |||
v_position->data.location = VARYING_SLOT_POS; | |||
a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"a_tex_position"); | |||
a_tex_position->data.location = VERT_ATTRIB_GENERIC1; | |||
v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"v_tex_position"); | |||
v_tex_position->data.location = VARYING_SLOT_VAR0; | |||
nir_copy_var(&b, v_position, a_position); | |||
nir_copy_var(&b, v_tex_position, a_tex_position); | |||
return b.shader; | |||
} | |||
/* simple passthrough shader */ | |||
static nir_shader * | |||
build_nir_fs(void) | |||
{ | |||
const struct glsl_type *vec4 = glsl_vec4_type(); | |||
nir_builder b; | |||
nir_variable *v_tex_position; /* vec4, varying texture coordinate */ | |||
nir_variable *f_color; /* vec4, fragment output color */ | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); | |||
b.shader->info.name = ralloc_asprintf(b.shader, | |||
"meta_resolve_fs"); | |||
v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, | |||
"v_tex_position"); | |||
v_tex_position->data.location = VARYING_SLOT_VAR0; | |||
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, | |||
"f_color"); | |||
f_color->data.location = FRAG_RESULT_DATA0; | |||
nir_copy_var(&b, f_color, v_tex_position); | |||
return b.shader; | |||
} | |||
static VkResult | |||
create_pass(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc; | |||
VkAttachmentDescription attachments[2]; | |||
int i; | |||
for (i = 0; i < 2; i++) { | |||
attachments[i].format = VK_FORMAT_UNDEFINED; | |||
attachments[i].samples = 1; | |||
attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; | |||
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; | |||
attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL; | |||
attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL; | |||
} | |||
result = radv_CreateRenderPass(device_h, | |||
&(VkRenderPassCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | |||
.attachmentCount = 2, | |||
.pAttachments = attachments, | |||
.subpassCount = 1, | |||
.pSubpasses = &(VkSubpassDescription) { | |||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | |||
.inputAttachmentCount = 0, | |||
.colorAttachmentCount = 2, | |||
.pColorAttachments = (VkAttachmentReference[]) { | |||
{ | |||
.attachment = 0, | |||
.layout = VK_IMAGE_LAYOUT_GENERAL, | |||
}, | |||
{ | |||
.attachment = 1, | |||
.layout = VK_IMAGE_LAYOUT_GENERAL, | |||
}, | |||
}, | |||
.pResolveAttachments = NULL, | |||
.pDepthStencilAttachment = &(VkAttachmentReference) { | |||
.attachment = VK_ATTACHMENT_UNUSED, | |||
}, | |||
.preserveAttachmentCount = 0, | |||
.pPreserveAttachments = NULL, | |||
}, | |||
.dependencyCount = 0, | |||
}, | |||
alloc, | |||
&device->meta_state.resolve.pass); | |||
return result; | |||
} | |||
static VkResult | |||
create_pipeline(struct radv_device *device, | |||
VkShaderModule vs_module_h) | |||
{ | |||
VkResult result; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
struct radv_shader_module fs_module = { | |||
.nir = build_nir_fs(), | |||
}; | |||
if (!fs_module.nir) { | |||
/* XXX: Need more accurate error */ | |||
result = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto cleanup; | |||
} | |||
result = radv_graphics_pipeline_create(device_h, | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
&(VkGraphicsPipelineCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | |||
.stageCount = 2, | |||
.pStages = (VkPipelineShaderStageCreateInfo[]) { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_VERTEX_BIT, | |||
.module = vs_module_h, | |||
.pName = "main", | |||
}, | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT, | |||
.module = radv_shader_module_to_handle(&fs_module), | |||
.pName = "main", | |||
}, | |||
}, | |||
.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | |||
.vertexBindingDescriptionCount = 1, | |||
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { | |||
{ | |||
.binding = 0, | |||
.stride = sizeof(struct vertex_attrs), | |||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX | |||
}, | |||
}, | |||
.vertexAttributeDescriptionCount = 2, | |||
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { | |||
{ | |||
/* Position */ | |||
.location = 0, | |||
.binding = 0, | |||
.format = VK_FORMAT_R32G32_SFLOAT, | |||
.offset = offsetof(struct vertex_attrs, position), | |||
}, | |||
{ | |||
/* Texture Coordinate */ | |||
.location = 1, | |||
.binding = 0, | |||
.format = VK_FORMAT_R32G32_SFLOAT, | |||
.offset = offsetof(struct vertex_attrs, tex_position), | |||
}, | |||
}, | |||
}, | |||
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | |||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, | |||
.primitiveRestartEnable = false, | |||
}, | |||
.pViewportState = &(VkPipelineViewportStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | |||
.viewportCount = 0, | |||
.scissorCount = 0, | |||
}, | |||
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | |||
.depthClampEnable = false, | |||
.rasterizerDiscardEnable = false, | |||
.polygonMode = VK_POLYGON_MODE_FILL, | |||
.cullMode = VK_CULL_MODE_NONE, | |||
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, | |||
}, | |||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | |||
.rasterizationSamples = 1, | |||
.sampleShadingEnable = false, | |||
.pSampleMask = NULL, | |||
.alphaToCoverageEnable = false, | |||
.alphaToOneEnable = false, | |||
}, | |||
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | |||
.logicOpEnable = false, | |||
.attachmentCount = 2, | |||
.pAttachments = (VkPipelineColorBlendAttachmentState []) { | |||
{ | |||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | | |||
VK_COLOR_COMPONENT_G_BIT | | |||
VK_COLOR_COMPONENT_B_BIT | | |||
VK_COLOR_COMPONENT_A_BIT, | |||
}, | |||
{ | |||
.colorWriteMask = 0, | |||
} | |||
}, | |||
}, | |||
.pDynamicState = NULL, | |||
.renderPass = device->meta_state.resolve.pass, | |||
.subpass = 0, | |||
}, | |||
&(struct radv_graphics_pipeline_create_info) { | |||
.use_rectlist = true, | |||
.custom_blend_mode = V_028808_CB_RESOLVE, | |||
}, | |||
&device->meta_state.alloc, | |||
&device->meta_state.resolve.pipeline); | |||
if (result != VK_SUCCESS) | |||
goto cleanup; | |||
goto cleanup; | |||
cleanup: | |||
ralloc_free(fs_module.nir); | |||
return result; | |||
} | |||
void | |||
radv_device_finish_meta_resolve_state(struct radv_device *device) | |||
{ | |||
struct radv_meta_state *state = &device->meta_state; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
VkRenderPass pass_h = device->meta_state.resolve.pass; | |||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc; | |||
if (pass_h) | |||
RADV_CALL(DestroyRenderPass)(device_h, pass_h, | |||
&device->meta_state.alloc); | |||
VkPipeline pipeline_h = state->resolve.pipeline; | |||
if (pipeline_h) { | |||
RADV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); | |||
} | |||
} | |||
VkResult | |||
radv_device_init_meta_resolve_state(struct radv_device *device) | |||
{ | |||
VkResult res = VK_SUCCESS; | |||
zero(device->meta_state.resolve); | |||
struct radv_shader_module vs_module = { .nir = build_nir_vs() }; | |||
if (!vs_module.nir) { | |||
/* XXX: Need more accurate error */ | |||
res = VK_ERROR_OUT_OF_HOST_MEMORY; | |||
goto fail; | |||
} | |||
res = create_pass(device); | |||
if (res != VK_SUCCESS) | |||
goto fail; | |||
VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); | |||
res = create_pipeline(device, vs_module_h); | |||
if (res != VK_SUCCESS) | |||
goto fail; | |||
goto cleanup; | |||
fail: | |||
radv_device_finish_meta_resolve_state(device); | |||
cleanup: | |||
ralloc_free(vs_module.nir); | |||
return res; | |||
} | |||
static void | |||
emit_resolve(struct radv_cmd_buffer *cmd_buffer, | |||
const VkOffset2D *src_offset, | |||
const VkOffset2D *dest_offset, | |||
const VkExtent2D *resolve_extent) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); | |||
uint32_t offset; | |||
const struct vertex_attrs vertex_data[3] = { | |||
{ | |||
.position = { | |||
dest_offset->x, | |||
dest_offset->y, | |||
}, | |||
.tex_position = { | |||
src_offset->x, | |||
src_offset->y, | |||
}, | |||
}, | |||
{ | |||
.position = { | |||
dest_offset->x, | |||
dest_offset->y + resolve_extent->height, | |||
}, | |||
.tex_position = { | |||
src_offset->x, | |||
src_offset->y + resolve_extent->height, | |||
}, | |||
}, | |||
{ | |||
.position = { | |||
dest_offset->x + resolve_extent->width, | |||
dest_offset->y, | |||
}, | |||
.tex_position = { | |||
src_offset->x + resolve_extent->width, | |||
src_offset->y, | |||
}, | |||
}, | |||
}; | |||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; | |||
radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset); | |||
struct radv_buffer vertex_buffer = { | |||
.device = device, | |||
.size = sizeof(vertex_data), | |||
.bo = cmd_buffer->upload.upload_bo, | |||
.offset = offset, | |||
}; | |||
VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer); | |||
radv_CmdBindVertexBuffers(cmd_buffer_h, | |||
/*firstBinding*/ 0, | |||
/*bindingCount*/ 1, | |||
(VkBuffer[]) { vertex_buffer_h }, | |||
(VkDeviceSize[]) { 0 }); | |||
VkPipeline pipeline_h = device->meta_state.resolve.pipeline; | |||
RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h); | |||
if (cmd_buffer->state.pipeline != pipeline) { | |||
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, | |||
pipeline_h); | |||
} | |||
RADV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); | |||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; | |||
si_emit_cache_flush(cmd_buffer); | |||
} | |||
void radv_CmdResolveImage( | |||
VkCommandBuffer cmd_buffer_h, | |||
VkImage src_image_h, | |||
VkImageLayout src_image_layout, | |||
VkImage dest_image_h, | |||
VkImageLayout dest_image_layout, | |||
uint32_t region_count, | |||
const VkImageResolve* regions) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffer_h); | |||
RADV_FROM_HANDLE(radv_image, src_image, src_image_h); | |||
RADV_FROM_HANDLE(radv_image, dest_image, dest_image_h); | |||
struct radv_device *device = cmd_buffer->device; | |||
struct radv_meta_saved_state saved_state; | |||
VkDevice device_h = radv_device_to_handle(device); | |||
bool use_compute_resolve = false; | |||
/* we can use the hw resolve only for single full resolves */ | |||
if (region_count == 1) { | |||
if (regions[0].srcOffset.x || | |||
regions[0].srcOffset.y || | |||
regions[0].srcOffset.z) | |||
use_compute_resolve = true; | |||
if (regions[0].dstOffset.x || | |||
regions[0].dstOffset.y || | |||
regions[0].dstOffset.z) | |||
use_compute_resolve = true; | |||
if (regions[0].extent.width != src_image->extent.width || | |||
regions[0].extent.height != src_image->extent.height || | |||
regions[0].extent.depth != src_image->extent.depth) | |||
use_compute_resolve = true; | |||
} else | |||
use_compute_resolve = true; | |||
if (use_compute_resolve) { | |||
radv_meta_resolve_compute_image(cmd_buffer, | |||
src_image, | |||
src_image_layout, | |||
dest_image, | |||
dest_image_layout, | |||
region_count, regions); | |||
return; | |||
} | |||
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); | |||
assert(src_image->samples > 1); | |||
assert(dest_image->samples == 1); | |||
if (src_image->samples >= 16) { | |||
/* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the | |||
* glBlitFramebuffer workaround for samples >= 16. | |||
*/ | |||
radv_finishme("vkCmdResolveImage: need interpolation workaround when " | |||
"samples >= 16"); | |||
} | |||
if (src_image->array_size > 1) | |||
radv_finishme("vkCmdResolveImage: multisample array images"); | |||
for (uint32_t r = 0; r < region_count; ++r) { | |||
const VkImageResolve *region = ®ions[r]; | |||
/* From the Vulkan 1.0 spec: | |||
* | |||
* - The aspectMask member of srcSubresource and dstSubresource must | |||
* only contain VK_IMAGE_ASPECT_COLOR_BIT | |||
* | |||
* - The layerCount member of srcSubresource and dstSubresource must | |||
* match | |||
*/ | |||
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); | |||
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); | |||
assert(region->srcSubresource.layerCount == | |||
region->dstSubresource.layerCount); | |||
const uint32_t src_base_layer = | |||
radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, | |||
®ion->srcOffset); | |||
const uint32_t dest_base_layer = | |||
radv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, | |||
®ion->dstOffset); | |||
/** | |||
* From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images | |||
* | |||
* extent is the size in texels of the source image to resolve in width, | |||
* height and depth. 1D images use only x and width. 2D images use x, y, | |||
* width and height. 3D images use x, y, z, width, height and depth. | |||
* | |||
* srcOffset and dstOffset select the initial x, y, and z offsets in | |||
* texels of the sub-regions of the source and destination image data. | |||
* extent is the size in texels of the source image to resolve in width, | |||
* height and depth. 1D images use only x and width. 2D images use x, y, | |||
* width and height. 3D images use x, y, z, width, height and depth. | |||
*/ | |||
const struct VkExtent3D extent = | |||
radv_sanitize_image_extent(src_image->type, region->extent); | |||
const struct VkOffset3D srcOffset = | |||
radv_sanitize_image_offset(src_image->type, region->srcOffset); | |||
const struct VkOffset3D dstOffset = | |||
radv_sanitize_image_offset(dest_image->type, region->dstOffset); | |||
for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; | |||
++layer) { | |||
struct radv_image_view src_iview; | |||
radv_image_view_init(&src_iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = src_image_h, | |||
.viewType = radv_meta_get_view_type(src_image), | |||
.format = src_image->vk_format, | |||
.subresourceRange = { | |||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | |||
.baseMipLevel = region->srcSubresource.mipLevel, | |||
.levelCount = 1, | |||
.baseArrayLayer = src_base_layer + layer, | |||
.layerCount = 1, | |||
}, | |||
}, | |||
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); | |||
struct radv_image_view dest_iview; | |||
radv_image_view_init(&dest_iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = dest_image_h, | |||
.viewType = radv_meta_get_view_type(dest_image), | |||
.format = dest_image->vk_format, | |||
.subresourceRange = { | |||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | |||
.baseMipLevel = region->dstSubresource.mipLevel, | |||
.levelCount = 1, | |||
.baseArrayLayer = dest_base_layer + layer, | |||
.layerCount = 1, | |||
}, | |||
}, | |||
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); | |||
VkFramebuffer fb_h; | |||
radv_CreateFramebuffer(device_h, | |||
&(VkFramebufferCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | |||
.attachmentCount = 2, | |||
.pAttachments = (VkImageView[]) { | |||
radv_image_view_to_handle(&src_iview), | |||
radv_image_view_to_handle(&dest_iview), | |||
}, | |||
.width = radv_minify(dest_image->extent.width, | |||
region->dstSubresource.mipLevel), | |||
.height = radv_minify(dest_image->extent.height, | |||
region->dstSubresource.mipLevel), | |||
.layers = 1 | |||
}, | |||
&cmd_buffer->pool->alloc, | |||
&fb_h); | |||
RADV_CALL(CmdBeginRenderPass)(cmd_buffer_h, | |||
&(VkRenderPassBeginInfo) { | |||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | |||
.renderPass = device->meta_state.resolve.pass, | |||
.framebuffer = fb_h, | |||
.renderArea = { | |||
.offset = { | |||
dstOffset.x, | |||
dstOffset.y, | |||
}, | |||
.extent = { | |||
extent.width, | |||
extent.height, | |||
} | |||
}, | |||
.clearValueCount = 0, | |||
.pClearValues = NULL, | |||
}, | |||
VK_SUBPASS_CONTENTS_INLINE); | |||
emit_resolve(cmd_buffer, | |||
&(VkOffset2D) { | |||
.x = srcOffset.x, | |||
.y = srcOffset.y, | |||
}, | |||
&(VkOffset2D) { | |||
.x = dstOffset.x, | |||
.y = dstOffset.y, | |||
}, | |||
&(VkExtent2D) { | |||
.width = extent.width, | |||
.height = extent.height, | |||
}); | |||
RADV_CALL(CmdEndRenderPass)(cmd_buffer_h); | |||
radv_DestroyFramebuffer(device_h, fb_h, | |||
&cmd_buffer->pool->alloc); | |||
} | |||
} | |||
radv_meta_restore(&saved_state, cmd_buffer); | |||
} | |||
/** | |||
* Emit any needed resolves for the current subpass. | |||
*/ | |||
void | |||
radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer) | |||
{ | |||
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; | |||
const struct radv_subpass *subpass = cmd_buffer->state.subpass; | |||
struct radv_meta_saved_state saved_state; | |||
/* FINISHME(perf): Skip clears for resolve attachments. | |||
* | |||
* From the Vulkan 1.0 spec: | |||
* | |||
* If the first use of an attachment in a render pass is as a resolve | |||
* attachment, then the loadOp is effectively ignored as the resolve is | |||
* guaranteed to overwrite all pixels in the render area. | |||
*/ | |||
if (!subpass->has_resolve) | |||
return; | |||
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); | |||
for (uint32_t i = 0; i < subpass->color_count; ++i) { | |||
VkAttachmentReference src_att = subpass->color_attachments[i]; | |||
VkAttachmentReference dest_att = subpass->resolve_attachments[i]; | |||
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image; | |||
if (dest_att.attachment == VK_ATTACHMENT_UNUSED) | |||
continue; | |||
if (dst_img->surface.dcc_size) { | |||
radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff); | |||
cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | |||
} | |||
struct radv_subpass resolve_subpass = { | |||
.color_count = 2, | |||
.color_attachments = (VkAttachmentReference[]) { src_att, dest_att }, | |||
.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED }, | |||
}; | |||
radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false); | |||
/* Subpass resolves must respect the render area. We can ignore the | |||
* render area here because vkCmdBeginRenderPass set the render area | |||
* with 3DSTATE_DRAWING_RECTANGLE. | |||
* | |||
* XXX(chadv): Does the hardware really respect | |||
* 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? | |||
*/ | |||
emit_resolve(cmd_buffer, | |||
&(VkOffset2D) { 0, 0 }, | |||
&(VkOffset2D) { 0, 0 }, | |||
&(VkExtent2D) { fb->width, fb->height }); | |||
} | |||
cmd_buffer->state.subpass = subpass; | |||
radv_meta_restore(&saved_state, cmd_buffer); | |||
} |
@@ -0,0 +1,461 @@ | |||
/* | |||
* Copyright © 2016 Dave Airlie | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include "radv_meta.h" | |||
#include "radv_private.h" | |||
#include "nir/nir_builder.h" | |||
#include "sid.h" | |||
#include "vk_format.h" | |||
static nir_shader * | |||
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int samples) | |||
{ | |||
nir_builder b; | |||
char name[64]; | |||
nir_if *outer_if = NULL; | |||
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, | |||
false, | |||
false, | |||
GLSL_TYPE_FLOAT); | |||
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, | |||
false, | |||
false, | |||
GLSL_TYPE_FLOAT); | |||
snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float"); | |||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); | |||
b.shader->info.name = ralloc_strdup(b.shader, name); | |||
b.shader->info.cs.local_size[0] = 16; | |||
b.shader->info.cs.local_size[1] = 16; | |||
b.shader->info.cs.local_size[2] = 1; | |||
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, | |||
sampler_type, "s_tex"); | |||
input_img->data.descriptor_set = 0; | |||
input_img->data.binding = 0; | |||
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, | |||
img_type, "out_img"); | |||
output_img->data.descriptor_set = 0; | |||
output_img->data.binding = 1; | |||
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); | |||
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); | |||
nir_ssa_def *block_size = nir_imm_ivec4(&b, | |||
b.shader->info.cs.local_size[0], | |||
b.shader->info.cs.local_size[1], | |||
b.shader->info.cs.local_size[2], 0); | |||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); | |||
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); | |||
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
src_offset->num_components = 2; | |||
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset"); | |||
nir_builder_instr_insert(&b, &src_offset->instr); | |||
nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); | |||
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); | |||
dst_offset->num_components = 2; | |||
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset"); | |||
nir_builder_instr_insert(&b, &dst_offset->instr); | |||
nir_ssa_def *img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa); | |||
/* do a txf_ms on each sample */ | |||
nir_ssa_def *tmp; | |||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); | |||
tex->sampler_dim = GLSL_SAMPLER_DIM_MS; | |||
tex->op = nir_texop_txf_ms; | |||
tex->src[0].src_type = nir_tex_src_coord; | |||
tex->src[0].src = nir_src_for_ssa(img_coord); | |||
tex->src[1].src_type = nir_tex_src_ms_index; | |||
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); | |||
tex->dest_type = nir_type_float; | |||
tex->is_array = false; | |||
tex->coord_components = 2; | |||
tex->texture = nir_deref_var_create(tex, input_img); | |||
tex->sampler = NULL; | |||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); | |||
nir_builder_instr_insert(&b, &tex->instr); | |||
tmp = &tex->dest.ssa; | |||
nir_variable *color = | |||
nir_local_variable_create(b.impl, glsl_vec4_type(), "color"); | |||
if (!is_integer && samples > 1) { | |||
nir_tex_instr *tex_all_same = nir_tex_instr_create(b.shader, 1); | |||
tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS; | |||
tex_all_same->op = nir_texop_samples_identical; | |||
tex_all_same->src[0].src_type = nir_tex_src_coord; | |||
tex_all_same->src[0].src = nir_src_for_ssa(img_coord); | |||
tex_all_same->dest_type = nir_type_float; | |||
tex_all_same->is_array = false; | |||
tex_all_same->coord_components = 2; | |||
tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img); | |||
tex_all_same->sampler = NULL; | |||
nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex"); | |||
nir_builder_instr_insert(&b, &tex_all_same->instr); | |||
nir_ssa_def *all_same = nir_ine(&b, &tex_all_same->dest.ssa, nir_imm_int(&b, 0)); | |||
nir_if *if_stmt = nir_if_create(b.shader); | |||
if_stmt->condition = nir_src_for_ssa(all_same); | |||
nir_cf_node_insert(b.cursor, &if_stmt->cf_node); | |||
b.cursor = nir_after_cf_list(&if_stmt->then_list); | |||
for (int i = 1; i < samples; i++) { | |||
nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 2); | |||
tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS; | |||
tex_add->op = nir_texop_txf_ms; | |||
tex_add->src[0].src_type = nir_tex_src_coord; | |||
tex_add->src[0].src = nir_src_for_ssa(img_coord); | |||
tex_add->src[1].src_type = nir_tex_src_ms_index; | |||
tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); | |||
tex_add->dest_type = nir_type_float; | |||
tex_add->is_array = false; | |||
tex_add->coord_components = 2; | |||
tex_add->texture = nir_deref_var_create(tex_add, input_img); | |||
tex_add->sampler = NULL; | |||
nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex"); | |||
nir_builder_instr_insert(&b, &tex_add->instr); | |||
tmp = nir_fadd(&b, tmp, &tex_add->dest.ssa); | |||
} | |||
tmp = nir_fdiv(&b, tmp, nir_imm_float(&b, samples)); | |||
nir_store_var(&b, color, tmp, 0xf); | |||
b.cursor = nir_after_cf_list(&if_stmt->else_list); | |||
outer_if = if_stmt; | |||
} | |||
nir_store_var(&b, color, &tex->dest.ssa, 0xf); | |||
if (outer_if) | |||
b.cursor = nir_after_cf_node(&outer_if->cf_node); | |||
nir_ssa_def *newv = nir_load_var(&b, color); | |||
nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa); | |||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); | |||
store->src[0] = nir_src_for_ssa(coord); | |||
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); | |||
store->src[2] = nir_src_for_ssa(newv); | |||
store->variables[0] = nir_deref_var_create(store, output_img); | |||
nir_builder_instr_insert(&b, &store->instr); | |||
return b.shader; | |||
} | |||
static VkResult | |||
create_layout(struct radv_device *device) | |||
{ | |||
VkResult result; | |||
/* | |||
* two descriptors one for the image being sampled | |||
* one for the buffer being written. | |||
*/ | |||
VkDescriptorSetLayoutCreateInfo ds_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |||
.bindingCount = 2, | |||
.pBindings = (VkDescriptorSetLayoutBinding[]) { | |||
{ | |||
.binding = 0, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
{ | |||
.binding = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | |||
.descriptorCount = 1, | |||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.pImmutableSamplers = NULL | |||
}, | |||
} | |||
}; | |||
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), | |||
&ds_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.resolve_compute.ds_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
VkPipelineLayoutCreateInfo pl_create_info = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |||
.setLayoutCount = 1, | |||
.pSetLayouts = &device->meta_state.resolve_compute.ds_layout, | |||
.pushConstantRangeCount = 1, | |||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, | |||
}; | |||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), | |||
&pl_create_info, | |||
&device->meta_state.alloc, | |||
&device->meta_state.resolve_compute.p_layout); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
return VK_SUCCESS; | |||
fail: | |||
return result; | |||
} | |||
static VkResult | |||
create_resolve_pipeline(struct radv_device *device, | |||
int samples, | |||
bool is_integer, | |||
VkPipeline *pipeline) | |||
{ | |||
VkResult result; | |||
struct radv_shader_module cs = { .nir = NULL }; | |||
cs.nir = build_resolve_compute_shader(device, is_integer, samples); | |||
/* compute shader */ | |||
VkPipelineShaderStageCreateInfo pipeline_shader_stage = { | |||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |||
.stage = VK_SHADER_STAGE_COMPUTE_BIT, | |||
.module = radv_shader_module_to_handle(&cs), | |||
.pName = "main", | |||
.pSpecializationInfo = NULL, | |||
}; | |||
VkComputePipelineCreateInfo vk_pipeline_info = { | |||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |||
.stage = pipeline_shader_stage, | |||
.flags = 0, | |||
.layout = device->meta_state.resolve_compute.p_layout, | |||
}; | |||
result = radv_CreateComputePipelines(radv_device_to_handle(device), | |||
radv_pipeline_cache_to_handle(&device->meta_state.cache), | |||
1, &vk_pipeline_info, NULL, | |||
pipeline); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
ralloc_free(cs.nir); | |||
return VK_SUCCESS; | |||
fail: | |||
ralloc_free(cs.nir); | |||
return result; | |||
} | |||
VkResult | |||
radv_device_init_meta_resolve_compute_state(struct radv_device *device) | |||
{ | |||
struct radv_meta_state *state = &device->meta_state; | |||
VkResult res; | |||
memset(&device->meta_state.resolve_compute, 0, sizeof(device->meta_state.resolve_compute)); | |||
res = create_layout(device); | |||
if (res != VK_SUCCESS) | |||
return res; | |||
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { | |||
uint32_t samples = 1 << i; | |||
res = create_resolve_pipeline(device, samples, false, | |||
&state->resolve_compute.rc[i].pipeline); | |||
res = create_resolve_pipeline(device, samples, true, | |||
&state->resolve_compute.rc[i].i_pipeline); | |||
} | |||
return res; | |||
} | |||
void | |||
radv_device_finish_meta_resolve_compute_state(struct radv_device *device) | |||
{ | |||
struct radv_meta_state *state = &device->meta_state; | |||
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { | |||
radv_DestroyPipeline(radv_device_to_handle(device), | |||
state->resolve_compute.rc[i].pipeline, | |||
&state->alloc); | |||
radv_DestroyPipeline(radv_device_to_handle(device), | |||
state->resolve_compute.rc[i].i_pipeline, | |||
&state->alloc); | |||
} | |||
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), | |||
state->resolve_compute.ds_layout, | |||
&state->alloc); | |||
radv_DestroyPipelineLayout(radv_device_to_handle(device), | |||
state->resolve_compute.p_layout, | |||
&state->alloc); | |||
} | |||
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, | |||
struct radv_image *src_image, | |||
VkImageLayout src_image_layout, | |||
struct radv_image *dest_image, | |||
VkImageLayout dest_image_layout, | |||
uint32_t region_count, | |||
const VkImageResolve *regions) | |||
{ | |||
struct radv_device *device = cmd_buffer->device; | |||
struct radv_meta_saved_compute_state saved_state; | |||
const uint32_t samples = src_image->samples; | |||
const uint32_t samples_log2 = ffs(samples) - 1; | |||
radv_meta_save_compute(&saved_state, cmd_buffer, 16); | |||
for (uint32_t r = 0; r < region_count; ++r) { | |||
const VkImageResolve *region = ®ions[r]; | |||
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); | |||
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); | |||
assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount); | |||
const uint32_t src_base_layer = | |||
radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, | |||
®ion->srcOffset); | |||
const uint32_t dest_base_layer = | |||
radv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, | |||
®ion->dstOffset); | |||
const struct VkExtent3D extent = | |||
radv_sanitize_image_extent(src_image->type, region->extent); | |||
const struct VkOffset3D srcOffset = | |||
radv_sanitize_image_offset(src_image->type, region->srcOffset); | |||
const struct VkOffset3D dstOffset = | |||
radv_sanitize_image_offset(dest_image->type, region->dstOffset); | |||
for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; | |||
++layer) { | |||
struct radv_image_view src_iview; | |||
VkDescriptorSet set; | |||
radv_image_view_init(&src_iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = radv_image_to_handle(src_image), | |||
.viewType = radv_meta_get_view_type(src_image), | |||
.format = src_image->vk_format, | |||
.subresourceRange = { | |||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | |||
.baseMipLevel = region->srcSubresource.mipLevel, | |||
.levelCount = 1, | |||
.baseArrayLayer = src_base_layer + layer, | |||
.layerCount = 1, | |||
}, | |||
}, | |||
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); | |||
struct radv_image_view dest_iview; | |||
radv_image_view_init(&dest_iview, cmd_buffer->device, | |||
&(VkImageViewCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | |||
.image = radv_image_to_handle(dest_image), | |||
.viewType = radv_meta_get_view_type(dest_image), | |||
.format = dest_image->vk_format, | |||
.subresourceRange = { | |||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | |||
.baseMipLevel = region->dstSubresource.mipLevel, | |||
.levelCount = 1, | |||
.baseArrayLayer = dest_base_layer + layer, | |||
.layerCount = 1, | |||
}, | |||
}, | |||
cmd_buffer, VK_IMAGE_USAGE_STORAGE_BIT); | |||
radv_temp_descriptor_set_create(device, cmd_buffer, | |||
device->meta_state.resolve_compute.ds_layout, | |||
&set); | |||
radv_UpdateDescriptorSets(radv_device_to_handle(device), | |||
2, /* writeCount */ | |||
(VkWriteDescriptorSet[]) { | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = set, | |||
.dstBinding = 0, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, | |||
.pImageInfo = (VkDescriptorImageInfo[]) { | |||
{ | |||
.sampler = NULL, | |||
.imageView = radv_image_view_to_handle(&src_iview), | |||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL, | |||
}, | |||
} | |||
}, | |||
{ | |||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |||
.dstSet = set, | |||
.dstBinding = 1, | |||
.dstArrayElement = 0, | |||
.descriptorCount = 1, | |||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | |||
.pImageInfo = (VkDescriptorImageInfo[]) { | |||
{ | |||
.sampler = NULL, | |||
.imageView = radv_image_view_to_handle(&dest_iview), | |||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL, | |||
}, | |||
} | |||
} | |||
}, 0, NULL); | |||
radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, | |||
device->meta_state.resolve_compute.p_layout, 0, 1, | |||
&set, 0, NULL); | |||
VkPipeline pipeline; | |||
if (vk_format_is_int(src_image->vk_format)) | |||
pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline; | |||
else | |||
pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline; | |||
if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { | |||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), | |||
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | |||
} | |||
unsigned push_constants[4] = { | |||
srcOffset.x, | |||
srcOffset.y, | |||
dstOffset.x, | |||
dstOffset.y, | |||
}; | |||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), | |||
device->meta_state.resolve_compute.p_layout, | |||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, | |||
push_constants); | |||
radv_unaligned_dispatch(cmd_buffer, extent.width, extent.height, 1); | |||
radv_temp_descriptor_set_destroy(cmd_buffer->device, set); | |||
} | |||
} | |||
radv_meta_restore_compute(&saved_state, cmd_buffer, 16); | |||
} |
@@ -0,0 +1,183 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based in part on anv driver which is: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "radv_private.h" | |||
VkResult radv_CreateRenderPass( | |||
VkDevice _device, | |||
const VkRenderPassCreateInfo* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkRenderPass* pRenderPass) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
struct radv_render_pass *pass; | |||
size_t size; | |||
size_t attachments_offset; | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); | |||
size = sizeof(*pass); | |||
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); | |||
attachments_offset = size; | |||
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); | |||
pass = radv_alloc2(&device->alloc, pAllocator, size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (pass == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
memset(pass, 0, size); | |||
pass->attachment_count = pCreateInfo->attachmentCount; | |||
pass->subpass_count = pCreateInfo->subpassCount; | |||
pass->attachments = (void *) pass + attachments_offset; | |||
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { | |||
struct radv_render_pass_attachment *att = &pass->attachments[i]; | |||
att->format = pCreateInfo->pAttachments[i].format; | |||
att->samples = pCreateInfo->pAttachments[i].samples; | |||
att->load_op = pCreateInfo->pAttachments[i].loadOp; | |||
att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; | |||
att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; | |||
att->final_layout = pCreateInfo->pAttachments[i].finalLayout; | |||
// att->store_op = pCreateInfo->pAttachments[i].storeOp; | |||
// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; | |||
} | |||
uint32_t subpass_attachment_count = 0; | |||
VkAttachmentReference *p; | |||
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { | |||
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; | |||
subpass_attachment_count += | |||
desc->inputAttachmentCount + | |||
desc->colorAttachmentCount + | |||
/* Count colorAttachmentCount again for resolve_attachments */ | |||
desc->colorAttachmentCount; | |||
} | |||
if (subpass_attachment_count) { | |||
pass->subpass_attachments = | |||
radv_alloc2(&device->alloc, pAllocator, | |||
subpass_attachment_count * sizeof(VkAttachmentReference), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (pass->subpass_attachments == NULL) { | |||
radv_free2(&device->alloc, pAllocator, pass); | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} | |||
} else | |||
pass->subpass_attachments = NULL; | |||
p = pass->subpass_attachments; | |||
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { | |||
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; | |||
struct radv_subpass *subpass = &pass->subpasses[i]; | |||
subpass->input_count = desc->inputAttachmentCount; | |||
subpass->color_count = desc->colorAttachmentCount; | |||
if (desc->inputAttachmentCount > 0) { | |||
subpass->input_attachments = p; | |||
p += desc->inputAttachmentCount; | |||
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { | |||
subpass->input_attachments[j] | |||
= desc->pInputAttachments[j]; | |||
} | |||
} | |||
if (desc->colorAttachmentCount > 0) { | |||
subpass->color_attachments = p; | |||
p += desc->colorAttachmentCount; | |||
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { | |||
subpass->color_attachments[j] | |||
= desc->pColorAttachments[j]; | |||
} | |||
} | |||
subpass->has_resolve = false; | |||
if (desc->pResolveAttachments) { | |||
subpass->resolve_attachments = p; | |||
p += desc->colorAttachmentCount; | |||
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { | |||
uint32_t a = desc->pResolveAttachments[j].attachment; | |||
subpass->resolve_attachments[j] | |||
= desc->pResolveAttachments[j]; | |||
if (a != VK_ATTACHMENT_UNUSED) | |||
subpass->has_resolve = true; | |||
} | |||
} | |||
if (desc->pDepthStencilAttachment) { | |||
subpass->depth_stencil_attachment = | |||
*desc->pDepthStencilAttachment; | |||
} else { | |||
subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; | |||
} | |||
} | |||
for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { | |||
uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; | |||
if (dst == VK_SUBPASS_EXTERNAL) { | |||
pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask; | |||
pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask; | |||
pass->end_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask; | |||
} else { | |||
pass->subpasses[dst].start_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask; | |||
pass->subpasses[dst].start_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask; | |||
pass->subpasses[dst].start_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask; | |||
} | |||
} | |||
*pRenderPass = radv_render_pass_to_handle(pass); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroyRenderPass( | |||
VkDevice _device, | |||
VkRenderPass _pass, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_render_pass, pass, _pass); | |||
if (!_pass) | |||
return; | |||
radv_free2(&device->alloc, pAllocator, pass->subpass_attachments); | |||
radv_free2(&device->alloc, pAllocator, pass); | |||
} | |||
void radv_GetRenderAreaGranularity( | |||
VkDevice device, | |||
VkRenderPass renderPass, | |||
VkExtent2D* pGranularity) | |||
{ | |||
pGranularity->width = 1; | |||
pGranularity->height = 1; | |||
} | |||
@@ -0,0 +1,475 @@ | |||
/* | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "util/mesa-sha1.h" | |||
#include "util/debug.h" | |||
#include "radv_private.h" | |||
#include "ac_nir_to_llvm.h" | |||
struct cache_entry { | |||
unsigned char sha1[20]; | |||
uint32_t code_size; | |||
struct ac_shader_variant_info variant_info; | |||
struct ac_shader_config config; | |||
uint32_t rsrc1, rsrc2; | |||
struct radv_shader_variant *variant; | |||
uint32_t code[0]; | |||
}; | |||
void | |||
radv_pipeline_cache_init(struct radv_pipeline_cache *cache, | |||
struct radv_device *device) | |||
{ | |||
cache->device = device; | |||
pthread_mutex_init(&cache->mutex, NULL); | |||
cache->modified = false; | |||
cache->kernel_count = 0; | |||
cache->total_size = 0; | |||
cache->table_size = 1024; | |||
const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); | |||
cache->hash_table = malloc(byte_size); | |||
/* We don't consider allocation failure fatal, we just start with a 0-sized | |||
* cache. */ | |||
if (cache->hash_table == NULL || | |||
!env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true)) | |||
cache->table_size = 0; | |||
else | |||
memset(cache->hash_table, 0, byte_size); | |||
} | |||
void | |||
radv_pipeline_cache_finish(struct radv_pipeline_cache *cache) | |||
{ | |||
for (unsigned i = 0; i < cache->table_size; ++i) | |||
if (cache->hash_table[i]) { | |||
if (cache->hash_table[i]->variant) | |||
radv_shader_variant_destroy(cache->device, | |||
cache->hash_table[i]->variant); | |||
radv_free(&cache->alloc, cache->hash_table[i]); | |||
} | |||
pthread_mutex_destroy(&cache->mutex); | |||
free(cache->hash_table); | |||
} | |||
static uint32_t | |||
entry_size(struct cache_entry *entry) | |||
{ | |||
return sizeof(*entry) + entry->code_size; | |||
} | |||
void | |||
radv_hash_shader(unsigned char *hash, struct radv_shader_module *module, | |||
const char *entrypoint, | |||
const VkSpecializationInfo *spec_info, | |||
const struct radv_pipeline_layout *layout, | |||
const union ac_shader_variant_key *key) | |||
{ | |||
struct mesa_sha1 *ctx; | |||
ctx = _mesa_sha1_init(); | |||
if (key) | |||
_mesa_sha1_update(ctx, key, sizeof(*key)); | |||
_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); | |||
_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); | |||
if (layout) | |||
_mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1)); | |||
if (spec_info) { | |||
_mesa_sha1_update(ctx, spec_info->pMapEntries, | |||
spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); | |||
_mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); | |||
} | |||
_mesa_sha1_final(ctx, hash); | |||
} | |||
static struct cache_entry * | |||
radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, | |||
const unsigned char *sha1) | |||
{ | |||
const uint32_t mask = cache->table_size - 1; | |||
const uint32_t start = (*(uint32_t *) sha1); | |||
for (uint32_t i = 0; i < cache->table_size; i++) { | |||
const uint32_t index = (start + i) & mask; | |||
struct cache_entry *entry = cache->hash_table[index]; | |||
if (!entry) | |||
return NULL; | |||
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { | |||
return entry; | |||
} | |||
} | |||
unreachable("hash table should never be full"); | |||
} | |||
static struct cache_entry * | |||
radv_pipeline_cache_search(struct radv_pipeline_cache *cache, | |||
const unsigned char *sha1) | |||
{ | |||
struct cache_entry *entry; | |||
pthread_mutex_lock(&cache->mutex); | |||
entry = radv_pipeline_cache_search_unlocked(cache, sha1); | |||
pthread_mutex_unlock(&cache->mutex); | |||
return entry; | |||
} | |||
struct radv_shader_variant * | |||
radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, | |||
struct radv_pipeline_cache *cache, | |||
const unsigned char *sha1) | |||
{ | |||
struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1); | |||
if (!entry) | |||
return NULL; | |||
if (!entry->variant) { | |||
struct radv_shader_variant *variant; | |||
variant = calloc(1, sizeof(struct radv_shader_variant)); | |||
if (!variant) | |||
return NULL; | |||
variant->config = entry->config; | |||
variant->info = entry->variant_info; | |||
variant->rsrc1 = entry->rsrc1; | |||
variant->rsrc2 = entry->rsrc2; | |||
variant->ref_count = 1; | |||
variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256, | |||
RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); | |||
void *ptr = device->ws->buffer_map(variant->bo); | |||
memcpy(ptr, entry->code, entry->code_size); | |||
device->ws->buffer_unmap(variant->bo); | |||
entry->variant = variant; | |||
} | |||
__sync_fetch_and_add(&entry->variant->ref_count, 1); | |||
return entry->variant; | |||
} | |||
static void | |||
radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, | |||
struct cache_entry *entry) | |||
{ | |||
const uint32_t mask = cache->table_size - 1; | |||
const uint32_t start = (*(uint32_t *) entry->sha1); | |||
/* We'll always be able to insert when we get here. */ | |||
assert(cache->kernel_count < cache->table_size / 2); | |||
for (uint32_t i = 0; i < cache->table_size; i++) { | |||
const uint32_t index = (start + i) & mask; | |||
if (!cache->hash_table[index]) { | |||
cache->hash_table[index] = entry; | |||
break; | |||
} | |||
} | |||
cache->total_size += entry_size(entry); | |||
cache->kernel_count++; | |||
} | |||
static VkResult | |||
radv_pipeline_cache_grow(struct radv_pipeline_cache *cache) | |||
{ | |||
const uint32_t table_size = cache->table_size * 2; | |||
const uint32_t old_table_size = cache->table_size; | |||
const size_t byte_size = table_size * sizeof(cache->hash_table[0]); | |||
struct cache_entry **table; | |||
struct cache_entry **old_table = cache->hash_table; | |||
table = malloc(byte_size); | |||
if (table == NULL) | |||
return VK_ERROR_OUT_OF_HOST_MEMORY; | |||
cache->hash_table = table; | |||
cache->table_size = table_size; | |||
cache->kernel_count = 0; | |||
cache->total_size = 0; | |||
memset(cache->hash_table, 0, byte_size); | |||
for (uint32_t i = 0; i < old_table_size; i++) { | |||
struct cache_entry *entry = old_table[i]; | |||
if (!entry) | |||
continue; | |||
radv_pipeline_cache_set_entry(cache, entry); | |||
} | |||
free(old_table); | |||
return VK_SUCCESS; | |||
} | |||
static void | |||
radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, | |||
struct cache_entry *entry) | |||
{ | |||
if (cache->kernel_count == cache->table_size / 2) | |||
radv_pipeline_cache_grow(cache); | |||
/* Failing to grow that hash table isn't fatal, but may mean we don't | |||
* have enough space to add this new kernel. Only add it if there's room. | |||
*/ | |||
if (cache->kernel_count < cache->table_size / 2) | |||
radv_pipeline_cache_set_entry(cache, entry); | |||
} | |||
struct radv_shader_variant * | |||
radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache, | |||
const unsigned char *sha1, | |||
struct radv_shader_variant *variant, | |||
const void *code, unsigned code_size) | |||
{ | |||
pthread_mutex_lock(&cache->mutex); | |||
struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1); | |||
if (entry) { | |||
if (entry->variant) { | |||
radv_shader_variant_destroy(cache->device, variant); | |||
variant = entry->variant; | |||
} else { | |||
entry->variant = variant; | |||
} | |||
__sync_fetch_and_add(&variant->ref_count, 1); | |||
pthread_mutex_unlock(&cache->mutex); | |||
return variant; | |||
} | |||
entry = radv_alloc(&cache->alloc, sizeof(*entry) + code_size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_CACHE); | |||
if (!entry) { | |||
pthread_mutex_unlock(&cache->mutex); | |||
return variant; | |||
} | |||
memcpy(entry->sha1, sha1, 20); | |||
memcpy(entry->code, code, code_size); | |||
entry->config = variant->config; | |||
entry->variant_info = variant->info; | |||
entry->rsrc1 = variant->rsrc1; | |||
entry->rsrc2 = variant->rsrc2; | |||
entry->code_size = code_size; | |||
entry->variant = variant; | |||
__sync_fetch_and_add(&variant->ref_count, 1); | |||
radv_pipeline_cache_add_entry(cache, entry); | |||
cache->modified = true; | |||
pthread_mutex_unlock(&cache->mutex); | |||
return variant; | |||
} | |||
struct cache_header { | |||
uint32_t header_size; | |||
uint32_t header_version; | |||
uint32_t vendor_id; | |||
uint32_t device_id; | |||
uint8_t uuid[VK_UUID_SIZE]; | |||
}; | |||
void | |||
radv_pipeline_cache_load(struct radv_pipeline_cache *cache, | |||
const void *data, size_t size) | |||
{ | |||
struct radv_device *device = cache->device; | |||
struct cache_header header; | |||
uint8_t uuid[VK_UUID_SIZE]; | |||
if (size < sizeof(header)) | |||
return; | |||
memcpy(&header, data, sizeof(header)); | |||
if (header.header_size < sizeof(header)) | |||
return; | |||
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) | |||
return; | |||
if (header.vendor_id != 0x1002) | |||
return; | |||
if (header.device_id != device->instance->physicalDevice.rad_info.pci_id) | |||
return; | |||
radv_device_get_cache_uuid(uuid); | |||
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) | |||
return; | |||
char *end = (void *) data + size; | |||
char *p = (void *) data + header.header_size; | |||
while (end - p >= sizeof(struct cache_entry)) { | |||
struct cache_entry *entry = (struct cache_entry*)p; | |||
struct cache_entry *dest_entry; | |||
if(end - p < sizeof(*entry) + entry->code_size) | |||
break; | |||
dest_entry = radv_alloc(&cache->alloc, sizeof(*entry) + entry->code_size, | |||
8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); | |||
if (dest_entry) { | |||
memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size); | |||
dest_entry->variant = NULL; | |||
radv_pipeline_cache_add_entry(cache, dest_entry); | |||
} | |||
p += sizeof (*entry) + entry->code_size; | |||
} | |||
} | |||
VkResult radv_CreatePipelineCache( | |||
VkDevice _device, | |||
const VkPipelineCacheCreateInfo* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkPipelineCache* pPipelineCache) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
struct radv_pipeline_cache *cache; | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); | |||
assert(pCreateInfo->flags == 0); | |||
cache = radv_alloc2(&device->alloc, pAllocator, | |||
sizeof(*cache), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (cache == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
if (pAllocator) | |||
cache->alloc = *pAllocator; | |||
else | |||
cache->alloc = device->alloc; | |||
radv_pipeline_cache_init(cache, device); | |||
if (pCreateInfo->initialDataSize > 0) { | |||
radv_pipeline_cache_load(cache, | |||
pCreateInfo->pInitialData, | |||
pCreateInfo->initialDataSize); | |||
} | |||
*pPipelineCache = radv_pipeline_cache_to_handle(cache); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroyPipelineCache( | |||
VkDevice _device, | |||
VkPipelineCache _cache, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); | |||
if (!cache) | |||
return; | |||
radv_pipeline_cache_finish(cache); | |||
radv_free2(&device->alloc, pAllocator, cache); | |||
} | |||
VkResult radv_GetPipelineCacheData( | |||
VkDevice _device, | |||
VkPipelineCache _cache, | |||
size_t* pDataSize, | |||
void* pData) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); | |||
struct cache_header *header; | |||
VkResult result = VK_SUCCESS; | |||
const size_t size = sizeof(*header) + cache->total_size; | |||
if (pData == NULL) { | |||
*pDataSize = size; | |||
return VK_SUCCESS; | |||
} | |||
if (*pDataSize < sizeof(*header)) { | |||
*pDataSize = 0; | |||
return VK_INCOMPLETE; | |||
} | |||
void *p = pData, *end = pData + *pDataSize; | |||
header = p; | |||
header->header_size = sizeof(*header); | |||
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; | |||
header->vendor_id = 0x1002; | |||
header->device_id = device->instance->physicalDevice.rad_info.pci_id; | |||
radv_device_get_cache_uuid(header->uuid); | |||
p += header->header_size; | |||
struct cache_entry *entry; | |||
for (uint32_t i = 0; i < cache->table_size; i++) { | |||
if (!cache->hash_table[i]) | |||
continue; | |||
entry = cache->hash_table[i]; | |||
const uint32_t size = entry_size(entry); | |||
if (end < p + size) { | |||
result = VK_INCOMPLETE; | |||
break; | |||
} | |||
memcpy(p, entry, size); | |||
((struct cache_entry*)p)->variant = NULL; | |||
p += size; | |||
} | |||
*pDataSize = p - pData; | |||
return result; | |||
} | |||
static void | |||
radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, | |||
struct radv_pipeline_cache *src) | |||
{ | |||
for (uint32_t i = 0; i < src->table_size; i++) { | |||
struct cache_entry *entry = src->hash_table[i]; | |||
if (!entry || radv_pipeline_cache_search(dst, entry->sha1)) | |||
continue; | |||
radv_pipeline_cache_add_entry(dst, entry); | |||
src->hash_table[i] = NULL; | |||
} | |||
} | |||
VkResult radv_MergePipelineCaches( | |||
VkDevice _device, | |||
VkPipelineCache destCache, | |||
uint32_t srcCacheCount, | |||
const VkPipelineCache* pSrcCaches) | |||
{ | |||
RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache); | |||
for (uint32_t i = 0; i < srcCacheCount; i++) { | |||
RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]); | |||
radv_pipeline_cache_merge(dst, src); | |||
} | |||
return VK_SUCCESS; | |||
} |
@@ -0,0 +1,415 @@ | |||
/* | |||
* Copyrigh 2016 Red Hat Inc. | |||
* Based on anv: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include <string.h> | |||
#include <unistd.h> | |||
#include <fcntl.h> | |||
#include "radv_private.h" | |||
#include "radv_cs.h" | |||
#include "sid.h" | |||
static unsigned get_max_db(struct radv_device *device) | |||
{ | |||
unsigned num_db = device->instance->physicalDevice.rad_info.num_render_backends; | |||
unsigned rb_mask = device->instance->physicalDevice.rad_info.enabled_rb_mask; | |||
if (device->instance->physicalDevice.rad_info.chip_class == SI) | |||
num_db = 8; | |||
else | |||
num_db = MAX2(8, num_db); | |||
/* Otherwise we need to change the query reset procedure */ | |||
assert(rb_mask == ((1ull << num_db) - 1)); | |||
return num_db; | |||
} | |||
VkResult radv_CreateQueryPool( | |||
VkDevice _device, | |||
const VkQueryPoolCreateInfo* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkQueryPool* pQueryPool) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
uint64_t size; | |||
struct radv_query_pool *pool = radv_alloc2(&device->alloc, pAllocator, | |||
sizeof(*pool), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (!pool) | |||
return VK_ERROR_OUT_OF_HOST_MEMORY; | |||
switch(pCreateInfo->queryType) { | |||
case VK_QUERY_TYPE_OCCLUSION: | |||
/* 16 bytes tmp. buffer as the compute packet writes 64 bits, but | |||
* the app. may have 32 bits of space. */ | |||
pool->stride = 16 * get_max_db(device) + 16; | |||
break; | |||
case VK_QUERY_TYPE_PIPELINE_STATISTICS: | |||
pool->stride = 16 * 11; | |||
break; | |||
case VK_QUERY_TYPE_TIMESTAMP: | |||
pool->stride = 8; | |||
break; | |||
default: | |||
unreachable("creating unhandled query type"); | |||
} | |||
pool->type = pCreateInfo->queryType; | |||
pool->availability_offset = pool->stride * pCreateInfo->queryCount; | |||
size = pool->availability_offset + 4 * pCreateInfo->queryCount; | |||
pool->bo = device->ws->buffer_create(device->ws, size, | |||
64, RADEON_DOMAIN_GTT, 0); | |||
if (!pool->bo) { | |||
radv_free2(&device->alloc, pAllocator, pool); | |||
return VK_ERROR_OUT_OF_DEVICE_MEMORY; | |||
} | |||
pool->ptr = device->ws->buffer_map(pool->bo); | |||
if (!pool->ptr) { | |||
device->ws->buffer_destroy(pool->bo); | |||
radv_free2(&device->alloc, pAllocator, pool); | |||
return VK_ERROR_OUT_OF_DEVICE_MEMORY; | |||
} | |||
memset(pool->ptr, 0, size); | |||
*pQueryPool = radv_query_pool_to_handle(pool); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroyQueryPool( | |||
VkDevice _device, | |||
VkQueryPool _pool, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(radv_query_pool, pool, _pool); | |||
if (!pool) | |||
return; | |||
device->ws->buffer_destroy(pool->bo); | |||
radv_free2(&device->alloc, pAllocator, pool); | |||
} | |||
VkResult radv_GetQueryPoolResults( | |||
VkDevice _device, | |||
VkQueryPool queryPool, | |||
uint32_t firstQuery, | |||
uint32_t queryCount, | |||
size_t dataSize, | |||
void* pData, | |||
VkDeviceSize stride, | |||
VkQueryResultFlags flags) | |||
{ | |||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); | |||
char *data = pData; | |||
VkResult result = VK_SUCCESS; | |||
for(unsigned i = 0; i < queryCount; ++i, data += stride) { | |||
char *dest = data; | |||
unsigned query = firstQuery + i; | |||
char *src = pool->ptr + query * pool->stride; | |||
uint32_t available; | |||
if (flags & VK_QUERY_RESULT_WAIT_BIT) { | |||
while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query)) | |||
; | |||
} | |||
if (!*(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query) && | |||
!(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { | |||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) | |||
*(uint32_t*)dest = 0; | |||
result = VK_NOT_READY; | |||
continue; | |||
} | |||
available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); | |||
switch (pool->type) { | |||
case VK_QUERY_TYPE_TIMESTAMP: | |||
if (flags & VK_QUERY_RESULT_64_BIT) { | |||
*(uint64_t*)dest = *(uint64_t*)src; | |||
dest += 8; | |||
} else { | |||
*(uint32_t*)dest = *(uint32_t*)src; | |||
dest += 4; | |||
} | |||
break; | |||
case VK_QUERY_TYPE_OCCLUSION: { | |||
uint64_t result = *(uint64_t*)(src + pool->stride - 16); | |||
if (flags & VK_QUERY_RESULT_64_BIT) { | |||
*(uint64_t*)dest = result; | |||
dest += 8; | |||
} else { | |||
*(uint32_t*)dest = result; | |||
dest += 4; | |||
} | |||
break; | |||
default: | |||
unreachable("trying to get results of unhandled query type"); | |||
} | |||
} | |||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { | |||
*(uint32_t*)dest = available; | |||
dest += 4; | |||
} | |||
} | |||
return result; | |||
} | |||
void radv_CmdCopyQueryPoolResults( | |||
VkCommandBuffer commandBuffer, | |||
VkQueryPool queryPool, | |||
uint32_t firstQuery, | |||
uint32_t queryCount, | |||
VkBuffer dstBuffer, | |||
VkDeviceSize dstOffset, | |||
VkDeviceSize stride, | |||
VkQueryResultFlags flags) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); | |||
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); | |||
struct radeon_winsys_cs *cs = cmd_buffer->cs; | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); | |||
uint64_t dest_va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo); | |||
dest_va += dst_buffer->offset + dstOffset; | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8); | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); | |||
for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) { | |||
unsigned query = firstQuery + i; | |||
uint64_t local_src_va = va + query * pool->stride; | |||
unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4; | |||
unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26); | |||
if (flags & VK_QUERY_RESULT_WAIT_BIT) { | |||
/* TODO, not sure if there is any case where we won't always be ready yet */ | |||
uint64_t avail_va = va + pool->availability_offset + 4 * query; | |||
/* This waits on the ME. All copies below are done on the ME */ | |||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); | |||
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); | |||
radeon_emit(cs, avail_va); | |||
radeon_emit(cs, avail_va >> 32); | |||
radeon_emit(cs, 1); /* reference value */ | |||
radeon_emit(cs, 0xffffffff); /* mask */ | |||
radeon_emit(cs, 4); /* poll interval */ | |||
} | |||
switch (pool->type) { | |||
case VK_QUERY_TYPE_OCCLUSION: | |||
local_src_va += pool->stride - 16; | |||
case VK_QUERY_TYPE_TIMESTAMP: | |||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); | |||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | | |||
COPY_DATA_DST_SEL(COPY_DATA_MEM) | | |||
((flags & VK_QUERY_RESULT_64_BIT) ? COPY_DATA_COUNT_SEL : 0)); | |||
radeon_emit(cs, local_src_va); | |||
radeon_emit(cs, local_src_va >> 32); | |||
radeon_emit(cs, dest_va); | |||
radeon_emit(cs, dest_va >> 32); | |||
break; | |||
default: | |||
unreachable("trying to get results of unhandled query type"); | |||
} | |||
/* The flag could be still changed while the data copy is busy and we | |||
* then might have invalid data, but a ready flag. However, the availability | |||
* writes happen on the ME too, so they should be synchronized. Might need to | |||
* revisit this with multiple queues. | |||
*/ | |||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { | |||
uint64_t avail_va = va + pool->availability_offset + 4 * query; | |||
uint64_t avail_dest_va = dest_va; | |||
if (pool->type != VK_QUERY_TYPE_PIPELINE_STATISTICS) | |||
avail_dest_va += elem_size; | |||
else | |||
abort(); | |||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); | |||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | | |||
COPY_DATA_DST_SEL(COPY_DATA_MEM)); | |||
radeon_emit(cs, avail_va); | |||
radeon_emit(cs, avail_va >> 32); | |||
radeon_emit(cs, avail_dest_va); | |||
radeon_emit(cs, avail_dest_va >> 32); | |||
} | |||
assert(cs->cdw <= cdw_max); | |||
} | |||
} | |||
void radv_CmdResetQueryPool( | |||
VkCommandBuffer commandBuffer, | |||
VkQueryPool queryPool, | |||
uint32_t firstQuery, | |||
uint32_t queryCount) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); | |||
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8); | |||
si_cp_dma_clear_buffer(cmd_buffer, va + firstQuery * pool->stride, | |||
queryCount * pool->stride, 0); | |||
si_cp_dma_clear_buffer(cmd_buffer, va + pool->availability_offset + firstQuery * 4, | |||
queryCount * 4, 0); | |||
} | |||
void radv_CmdBeginQuery( | |||
VkCommandBuffer commandBuffer, | |||
VkQueryPool queryPool, | |||
uint32_t query, | |||
VkQueryControlFlags flags) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); | |||
struct radeon_winsys_cs *cs = cmd_buffer->cs; | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); | |||
va += pool->stride * query; | |||
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 8); | |||
switch (pool->type) { | |||
case VK_QUERY_TYPE_OCCLUSION: | |||
radeon_check_space(cmd_buffer->device->ws, cs, 7); | |||
++cmd_buffer->state.active_occlusion_queries; | |||
if (cmd_buffer->state.active_occlusion_queries == 1) | |||
radv_set_db_count_control(cmd_buffer); | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); | |||
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); | |||
radeon_emit(cs, va); | |||
radeon_emit(cs, va >> 32); | |||
break; | |||
default: | |||
unreachable("beginning unhandled query type"); | |||
} | |||
} | |||
void radv_CmdEndQuery( | |||
VkCommandBuffer commandBuffer, | |||
VkQueryPool queryPool, | |||
uint32_t query) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); | |||
struct radeon_winsys_cs *cs = cmd_buffer->cs; | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); | |||
uint64_t avail_va = va + pool->availability_offset + 4 * query; | |||
va += pool->stride * query; | |||
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 8); | |||
switch (pool->type) { | |||
case VK_QUERY_TYPE_OCCLUSION: | |||
radeon_check_space(cmd_buffer->device->ws, cs, 14); | |||
cmd_buffer->state.active_occlusion_queries--; | |||
if (cmd_buffer->state.active_occlusion_queries == 0) | |||
radv_set_db_count_control(cmd_buffer); | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); | |||
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); | |||
radeon_emit(cs, va + 8); | |||
radeon_emit(cs, (va + 8) >> 32); | |||
radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0)); | |||
radeon_emit(cs, va); | |||
radeon_emit(cs, va >> 32); | |||
radeon_emit(cs, va + pool->stride - 16); | |||
radeon_emit(cs, (va + pool->stride - 16) >> 32); | |||
break; | |||
default: | |||
unreachable("ending unhandled query type"); | |||
} | |||
radeon_check_space(cmd_buffer->device->ws, cs, 5); | |||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); | |||
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | | |||
S_370_WR_CONFIRM(1) | | |||
S_370_ENGINE_SEL(V_370_ME)); | |||
radeon_emit(cs, avail_va); | |||
radeon_emit(cs, avail_va >> 32); | |||
radeon_emit(cs, 1); | |||
} | |||
void radv_CmdWriteTimestamp( | |||
VkCommandBuffer commandBuffer, | |||
VkPipelineStageFlagBits pipelineStage, | |||
VkQueryPool queryPool, | |||
uint32_t query) | |||
{ | |||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); | |||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); | |||
struct radeon_winsys_cs *cs = cmd_buffer->cs; | |||
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); | |||
uint64_t avail_va = va + pool->availability_offset + 4 * query; | |||
uint64_t query_va = va + pool->stride * query; | |||
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5); | |||
unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 11); | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); | |||
radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); | |||
radeon_emit(cs, query_va); | |||
radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF)); | |||
radeon_emit(cs, 0); | |||
radeon_emit(cs, 0); | |||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); | |||
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | | |||
S_370_WR_CONFIRM(1) | | |||
S_370_ENGINE_SEL(V_370_ME)); | |||
radeon_emit(cs, avail_va); | |||
radeon_emit(cs, avail_va >> 32); | |||
radeon_emit(cs, 1); | |||
assert(cmd_buffer->cs->cdw <= cdw_max); | |||
} |
@@ -0,0 +1,336 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Based on radeon_winsys.h which is: | |||
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> | |||
* Copyright 2010 Marek Olšák <maraeo@gmail.com> | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <stdint.h> | |||
#include <stdbool.h> | |||
#include <stdlib.h> | |||
#include "main/macros.h" | |||
#include "amd_family.h" | |||
#define FREE(x) free(x) | |||
enum radeon_bo_domain { /* bitfield */ | |||
RADEON_DOMAIN_GTT = 2, | |||
RADEON_DOMAIN_VRAM = 4, | |||
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT | |||
}; | |||
enum radeon_bo_flag { /* bitfield */ | |||
RADEON_FLAG_GTT_WC = (1 << 0), | |||
RADEON_FLAG_CPU_ACCESS = (1 << 1), | |||
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), | |||
}; | |||
enum radeon_bo_usage { /* bitfield */ | |||
RADEON_USAGE_READ = 2, | |||
RADEON_USAGE_WRITE = 4, | |||
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE | |||
}; | |||
enum ring_type { | |||
RING_GFX = 0, | |||
RING_COMPUTE, | |||
RING_DMA, | |||
RING_UVD, | |||
RING_VCE, | |||
RING_LAST, | |||
}; | |||
struct radeon_winsys_cs { | |||
unsigned cdw; /* Number of used dwords. */ | |||
unsigned max_dw; /* Maximum number of dwords. */ | |||
uint32_t *buf; /* The base pointer of the chunk. */ | |||
}; | |||
struct radeon_info { | |||
/* PCI info: domain:bus:dev:func */ | |||
uint32_t pci_domain; | |||
uint32_t pci_bus; | |||
uint32_t pci_dev; | |||
uint32_t pci_func; | |||
/* Device info. */ | |||
uint32_t pci_id; | |||
enum radeon_family family; | |||
const char *name; | |||
enum chip_class chip_class; | |||
uint32_t gart_page_size; | |||
uint64_t gart_size; | |||
uint64_t vram_size; | |||
bool has_dedicated_vram; | |||
bool has_virtual_memory; | |||
bool gfx_ib_pad_with_type2; | |||
bool has_sdma; | |||
bool has_uvd; | |||
uint32_t vce_fw_version; | |||
uint32_t vce_harvest_config; | |||
uint32_t clock_crystal_freq; | |||
/* Kernel info. */ | |||
uint32_t drm_major; /* version */ | |||
uint32_t drm_minor; | |||
uint32_t drm_patchlevel; | |||
bool has_userptr; | |||
/* Shader cores. */ | |||
uint32_t r600_max_quad_pipes; /* wave size / 16 */ | |||
uint32_t max_shader_clock; | |||
uint32_t num_good_compute_units; | |||
uint32_t max_se; /* shader engines */ | |||
uint32_t max_sh_per_se; /* shader arrays per shader engine */ | |||
/* Render backends (color + depth blocks). */ | |||
uint32_t r300_num_gb_pipes; | |||
uint32_t r300_num_z_pipes; | |||
uint32_t r600_gb_backend_map; /* R600 harvest config */ | |||
bool r600_gb_backend_map_valid; | |||
uint32_t r600_num_banks; | |||
uint32_t num_render_backends; | |||
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */ | |||
uint32_t pipe_interleave_bytes; | |||
uint32_t enabled_rb_mask; /* GCN harvest config */ | |||
/* Tile modes. */ | |||
uint32_t si_tile_mode_array[32]; | |||
uint32_t cik_macrotile_mode_array[16]; | |||
}; | |||
#define RADEON_SURF_MAX_LEVEL 32 | |||
#define RADEON_SURF_TYPE_MASK 0xFF | |||
#define RADEON_SURF_TYPE_SHIFT 0 | |||
#define RADEON_SURF_TYPE_1D 0 | |||
#define RADEON_SURF_TYPE_2D 1 | |||
#define RADEON_SURF_TYPE_3D 2 | |||
#define RADEON_SURF_TYPE_CUBEMAP 3 | |||
#define RADEON_SURF_TYPE_1D_ARRAY 4 | |||
#define RADEON_SURF_TYPE_2D_ARRAY 5 | |||
#define RADEON_SURF_MODE_MASK 0xFF | |||
#define RADEON_SURF_MODE_SHIFT 8 | |||
#define RADEON_SURF_MODE_LINEAR_ALIGNED 1 | |||
#define RADEON_SURF_MODE_1D 2 | |||
#define RADEON_SURF_MODE_2D 3 | |||
#define RADEON_SURF_SCANOUT (1 << 16) | |||
#define RADEON_SURF_ZBUFFER (1 << 17) | |||
#define RADEON_SURF_SBUFFER (1 << 18) | |||
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER) | |||
#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19) | |||
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) | |||
#define RADEON_SURF_FMASK (1 << 21) | |||
#define RADEON_SURF_DISABLE_DCC (1 << 22) | |||
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) | |||
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) | |||
#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT)) | |||
struct radeon_surf_level { | |||
uint64_t offset; | |||
uint64_t slice_size; | |||
uint32_t npix_x; | |||
uint32_t npix_y; | |||
uint32_t npix_z; | |||
uint32_t nblk_x; | |||
uint32_t nblk_y; | |||
uint32_t nblk_z; | |||
uint32_t pitch_bytes; | |||
uint32_t mode; | |||
uint64_t dcc_offset; | |||
uint64_t dcc_fast_clear_size; | |||
bool dcc_enabled; | |||
}; | |||
/* surface defintions from the winsys */ | |||
struct radeon_surf { | |||
/* These are inputs to the calculator. */ | |||
uint32_t npix_x; | |||
uint32_t npix_y; | |||
uint32_t npix_z; | |||
uint32_t blk_w; | |||
uint32_t blk_h; | |||
uint32_t blk_d; | |||
uint32_t array_size; | |||
uint32_t last_level; | |||
uint32_t bpe; | |||
uint32_t nsamples; | |||
uint32_t flags; | |||
/* These are return values. Some of them can be set by the caller, but | |||
* they will be treated as hints (e.g. bankw, bankh) and might be | |||
* changed by the calculator. | |||
*/ | |||
uint64_t bo_size; | |||
uint64_t bo_alignment; | |||
/* This applies to EG and later. */ | |||
uint32_t bankw; | |||
uint32_t bankh; | |||
uint32_t mtilea; | |||
uint32_t tile_split; | |||
uint32_t stencil_tile_split; | |||
uint64_t stencil_offset; | |||
struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL]; | |||
struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL]; | |||
uint32_t tiling_index[RADEON_SURF_MAX_LEVEL]; | |||
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL]; | |||
uint32_t pipe_config; | |||
uint32_t num_banks; | |||
uint32_t macro_tile_index; | |||
uint32_t micro_tile_mode; /* displayable, thin, depth, rotated */ | |||
/* Whether the depth miptree or stencil miptree as used by the DB are | |||
* adjusted from their TC compatible form to ensure depth/stencil | |||
* compatibility. If either is true, the corresponding plane cannot be | |||
* sampled from. | |||
*/ | |||
bool depth_adjusted; | |||
bool stencil_adjusted; | |||
uint64_t dcc_size; | |||
uint64_t dcc_alignment; | |||
}; | |||
enum radeon_bo_layout { | |||
RADEON_LAYOUT_LINEAR = 0, | |||
RADEON_LAYOUT_TILED, | |||
RADEON_LAYOUT_SQUARETILED, | |||
RADEON_LAYOUT_UNKNOWN | |||
}; | |||
/* Tiling info for display code, DRI sharing, and other data. */ | |||
struct radeon_bo_metadata { | |||
/* Tiling flags describing the texture layout for display code | |||
* and DRI sharing. | |||
*/ | |||
enum radeon_bo_layout microtile; | |||
enum radeon_bo_layout macrotile; | |||
unsigned pipe_config; | |||
unsigned bankw; | |||
unsigned bankh; | |||
unsigned tile_split; | |||
unsigned mtilea; | |||
unsigned num_banks; | |||
unsigned stride; | |||
bool scanout; | |||
/* Additional metadata associated with the buffer, in bytes. | |||
* The maximum size is 64 * 4. This is opaque for the winsys & kernel. | |||
* Supported by amdgpu only. | |||
*/ | |||
uint32_t size_metadata; | |||
uint32_t metadata[64]; | |||
}; | |||
struct radeon_winsys_bo; | |||
struct radeon_winsys_fence; | |||
struct radeon_winsys { | |||
void (*destroy)(struct radeon_winsys *ws); | |||
void (*query_info)(struct radeon_winsys *ws, | |||
struct radeon_info *info); | |||
struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws, | |||
uint64_t size, | |||
unsigned alignment, | |||
enum radeon_bo_domain domain, | |||
enum radeon_bo_flag flags); | |||
void (*buffer_destroy)(struct radeon_winsys_bo *bo); | |||
void *(*buffer_map)(struct radeon_winsys_bo *bo); | |||
struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws, | |||
int fd, | |||
unsigned *stride, unsigned *offset); | |||
bool (*buffer_get_fd)(struct radeon_winsys *ws, | |||
struct radeon_winsys_bo *bo, | |||
int *fd); | |||
void (*buffer_unmap)(struct radeon_winsys_bo *bo); | |||
uint64_t (*buffer_get_va)(struct radeon_winsys_bo *bo); | |||
void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, | |||
struct radeon_bo_metadata *md); | |||
struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); | |||
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); | |||
bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx); | |||
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, | |||
enum ring_type ring_type); | |||
void (*cs_destroy)(struct radeon_winsys_cs *cs); | |||
void (*cs_reset)(struct radeon_winsys_cs *cs); | |||
bool (*cs_finalize)(struct radeon_winsys_cs *cs); | |||
void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size); | |||
int (*cs_submit)(struct radeon_winsys_ctx *ctx, | |||
struct radeon_winsys_cs **cs_array, | |||
unsigned cs_count, | |||
bool can_patch, | |||
struct radeon_winsys_fence *fence); | |||
void (*cs_add_buffer)(struct radeon_winsys_cs *cs, | |||
struct radeon_winsys_bo *bo, | |||
uint8_t priority); | |||
void (*cs_execute_secondary)(struct radeon_winsys_cs *parent, | |||
struct radeon_winsys_cs *child); | |||
int (*surface_init)(struct radeon_winsys *ws, | |||
struct radeon_surf *surf); | |||
int (*surface_best)(struct radeon_winsys *ws, | |||
struct radeon_surf *surf); | |||
struct radeon_winsys_fence *(*create_fence)(); | |||
void (*destroy_fence)(struct radeon_winsys_fence *fence); | |||
bool (*fence_wait)(struct radeon_winsys *ws, | |||
struct radeon_winsys_fence *fence, | |||
bool absolute, | |||
uint64_t timeout); | |||
}; | |||
static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value) | |||
{ | |||
cs->buf[cs->cdw++] = value; | |||
} | |||
static inline void radeon_emit_array(struct radeon_winsys_cs *cs, | |||
const uint32_t *values, unsigned count) | |||
{ | |||
memcpy(cs->buf + cs->cdw, values, count * 4); | |||
cs->cdw += count; | |||
} | |||
@@ -0,0 +1,204 @@ | |||
/* | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <stdarg.h> | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <errno.h> | |||
#include <assert.h> | |||
#include "radv_private.h" | |||
#include "util/u_math.h" | |||
/** Log an error message. */ | |||
void radv_printflike(1, 2) | |||
radv_loge(const char *format, ...) | |||
{ | |||
va_list va; | |||
va_start(va, format); | |||
radv_loge_v(format, va); | |||
va_end(va); | |||
} | |||
/** \see radv_loge() */ | |||
void | |||
radv_loge_v(const char *format, va_list va) | |||
{ | |||
fprintf(stderr, "vk: error: "); | |||
vfprintf(stderr, format, va); | |||
fprintf(stderr, "\n"); | |||
} | |||
void radv_printflike(3, 4) | |||
__radv_finishme(const char *file, int line, const char *format, ...) | |||
{ | |||
va_list ap; | |||
char buffer[256]; | |||
va_start(ap, format); | |||
vsnprintf(buffer, sizeof(buffer), format, ap); | |||
va_end(ap); | |||
fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); | |||
} | |||
void radv_noreturn radv_printflike(1, 2) | |||
radv_abortf(const char *format, ...) | |||
{ | |||
va_list va; | |||
va_start(va, format); | |||
radv_abortfv(format, va); | |||
va_end(va); | |||
} | |||
void radv_noreturn | |||
radv_abortfv(const char *format, va_list va) | |||
{ | |||
fprintf(stderr, "vk: error: "); | |||
vfprintf(stderr, format, va); | |||
fprintf(stderr, "\n"); | |||
abort(); | |||
} | |||
VkResult | |||
__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) | |||
{ | |||
va_list ap; | |||
char buffer[256]; | |||
#define ERROR_CASE(error) case error: error_str = #error; break; | |||
const char *error_str; | |||
switch ((int32_t)error) { | |||
/* Core errors */ | |||
ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) | |||
ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) | |||
ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) | |||
ERROR_CASE(VK_ERROR_DEVICE_LOST) | |||
ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) | |||
ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) | |||
ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) | |||
ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) | |||
/* Extension errors */ | |||
ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) | |||
default: | |||
assert(!"Unknown error"); | |||
error_str = "unknown error"; | |||
} | |||
#undef ERROR_CASE | |||
if (format) { | |||
va_start(ap, format); | |||
vsnprintf(buffer, sizeof(buffer), format, ap); | |||
va_end(ap); | |||
fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); | |||
} else { | |||
fprintf(stderr, "%s:%d: %s\n", file, line, error_str); | |||
} | |||
return error; | |||
} | |||
int | |||
radv_vector_init(struct radv_vector *vector, uint32_t element_size, uint32_t size) | |||
{ | |||
assert(util_is_power_of_two(size)); | |||
assert(element_size < size && util_is_power_of_two(element_size)); | |||
vector->head = 0; | |||
vector->tail = 0; | |||
vector->element_size = element_size; | |||
vector->size = size; | |||
vector->data = malloc(size); | |||
return vector->data != NULL; | |||
} | |||
void * | |||
radv_vector_add(struct radv_vector *vector) | |||
{ | |||
uint32_t offset, size, split, src_tail, dst_tail; | |||
void *data; | |||
if (vector->head - vector->tail == vector->size) { | |||
size = vector->size * 2; | |||
data = malloc(size); | |||
if (data == NULL) | |||
return NULL; | |||
src_tail = vector->tail & (vector->size - 1); | |||
dst_tail = vector->tail & (size - 1); | |||
if (src_tail == 0) { | |||
/* Since we know that the vector is full, this means that it's | |||
* linear from start to end so we can do one copy. | |||
*/ | |||
memcpy(data + dst_tail, vector->data, vector->size); | |||
} else { | |||
/* In this case, the vector is split into two pieces and we have | |||
* to do two copies. We have to be careful to make sure each | |||
* piece goes to the right locations. Thanks to the change in | |||
* size, it may or may not still wrap around. | |||
*/ | |||
split = align_u32(vector->tail, vector->size); | |||
assert(vector->tail <= split && split < vector->head); | |||
memcpy(data + dst_tail, vector->data + src_tail, | |||
split - vector->tail); | |||
memcpy(data + (split & (size - 1)), vector->data, | |||
vector->head - split); | |||
} | |||
free(vector->data); | |||
vector->data = data; | |||
vector->size = size; | |||
} | |||
assert(vector->head - vector->tail < vector->size); | |||
offset = vector->head & (vector->size - 1); | |||
vector->head += vector->element_size; | |||
return vector->data + offset; | |||
} | |||
void * | |||
radv_vector_remove(struct radv_vector *vector) | |||
{ | |||
uint32_t offset; | |||
if (vector->head == vector->tail) | |||
return NULL; | |||
assert(vector->head - vector->tail <= vector->size); | |||
offset = vector->tail & (vector->size - 1); | |||
vector->tail += vector->element_size; | |||
return vector->data + offset; | |||
} |
@@ -0,0 +1,9 @@ | |||
#pragma once | |||
#ifdef HAVE___BUILTIN_POPCOUNT | |||
#define util_bitcount(i) __builtin_popcount(i) | |||
#else | |||
extern unsigned int | |||
util_bitcount(unsigned int n); | |||
#endif | |||
@@ -0,0 +1,246 @@ | |||
/* | |||
* Copyright © 2016 Red Hat | |||
* based on intel anv code: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "radv_wsi.h" | |||
VkResult | |||
radv_init_wsi(struct radv_physical_device *physical_device) | |||
{ | |||
VkResult result; | |||
memset(physical_device->wsi, 0, sizeof(physical_device->wsi)); | |||
#ifdef VK_USE_PLATFORM_XCB_KHR | |||
result = radv_x11_init_wsi(physical_device); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
#endif | |||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR | |||
result = radv_wl_init_wsi(physical_device); | |||
if (result != VK_SUCCESS) { | |||
#ifdef VK_USE_PLATFORM_XCB_KHR | |||
radv_x11_finish_wsi(physical_device); | |||
#endif | |||
return result; | |||
} | |||
#endif | |||
return VK_SUCCESS; | |||
} | |||
void | |||
radv_finish_wsi(struct radv_physical_device *physical_device) | |||
{ | |||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR | |||
radv_wl_finish_wsi(physical_device); | |||
#endif | |||
#ifdef VK_USE_PLATFORM_XCB_KHR | |||
radv_x11_finish_wsi(physical_device); | |||
#endif | |||
} | |||
void radv_DestroySurfaceKHR( | |||
VkInstance _instance, | |||
VkSurfaceKHR _surface, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_instance, instance, _instance); | |||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); | |||
radv_free2(&instance->alloc, pAllocator, surface); | |||
} | |||
VkResult radv_GetPhysicalDeviceSurfaceSupportKHR( | |||
VkPhysicalDevice physicalDevice, | |||
uint32_t queueFamilyIndex, | |||
VkSurfaceKHR _surface, | |||
VkBool32* pSupported) | |||
{ | |||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); | |||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); | |||
struct radv_wsi_interface *iface = device->wsi[surface->platform]; | |||
return iface->get_support(surface, device, queueFamilyIndex, pSupported); | |||
} | |||
VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR( | |||
VkPhysicalDevice physicalDevice, | |||
VkSurfaceKHR _surface, | |||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) | |||
{ | |||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); | |||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); | |||
struct radv_wsi_interface *iface = device->wsi[surface->platform]; | |||
return iface->get_capabilities(surface, device, pSurfaceCapabilities); | |||
} | |||
VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR( | |||
VkPhysicalDevice physicalDevice, | |||
VkSurfaceKHR _surface, | |||
uint32_t* pSurfaceFormatCount, | |||
VkSurfaceFormatKHR* pSurfaceFormats) | |||
{ | |||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); | |||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); | |||
struct radv_wsi_interface *iface = device->wsi[surface->platform]; | |||
return iface->get_formats(surface, device, pSurfaceFormatCount, | |||
pSurfaceFormats); | |||
} | |||
VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR( | |||
VkPhysicalDevice physicalDevice, | |||
VkSurfaceKHR _surface, | |||
uint32_t* pPresentModeCount, | |||
VkPresentModeKHR* pPresentModes) | |||
{ | |||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); | |||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); | |||
struct radv_wsi_interface *iface = device->wsi[surface->platform]; | |||
return iface->get_present_modes(surface, device, pPresentModeCount, | |||
pPresentModes); | |||
} | |||
VkResult radv_CreateSwapchainKHR( | |||
VkDevice _device, | |||
const VkSwapchainCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkSwapchainKHR* pSwapchain) | |||
{ | |||
RADV_FROM_HANDLE(radv_device, device, _device); | |||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); | |||
struct radv_wsi_interface *iface = | |||
device->instance->physicalDevice.wsi[surface->platform]; | |||
struct radv_swapchain *swapchain; | |||
VkResult result = iface->create_swapchain(surface, device, pCreateInfo, | |||
pAllocator, &swapchain); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
if (pAllocator) | |||
swapchain->alloc = *pAllocator; | |||
else | |||
swapchain->alloc = device->alloc; | |||
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) | |||
swapchain->fences[i] = VK_NULL_HANDLE; | |||
*pSwapchain = radv_swapchain_to_handle(swapchain); | |||
return VK_SUCCESS; | |||
} | |||
void radv_DestroySwapchainKHR( | |||
VkDevice device, | |||
VkSwapchainKHR _swapchain, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
RADV_FROM_HANDLE(radv_swapchain, swapchain, _swapchain); | |||
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) { | |||
if (swapchain->fences[i] != VK_NULL_HANDLE) | |||
radv_DestroyFence(device, swapchain->fences[i], pAllocator); | |||
} | |||
swapchain->destroy(swapchain, pAllocator); | |||
} | |||
VkResult radv_GetSwapchainImagesKHR( | |||
VkDevice device, | |||
VkSwapchainKHR _swapchain, | |||
uint32_t* pSwapchainImageCount, | |||
VkImage* pSwapchainImages) | |||
{ | |||
RADV_FROM_HANDLE(radv_swapchain, swapchain, _swapchain); | |||
return swapchain->get_images(swapchain, pSwapchainImageCount, | |||
pSwapchainImages); | |||
} | |||
VkResult radv_AcquireNextImageKHR( | |||
VkDevice device, | |||
VkSwapchainKHR _swapchain, | |||
uint64_t timeout, | |||
VkSemaphore semaphore, | |||
VkFence fence, | |||
uint32_t* pImageIndex) | |||
{ | |||
RADV_FROM_HANDLE(radv_swapchain, swapchain, _swapchain); | |||
return swapchain->acquire_next_image(swapchain, timeout, semaphore, | |||
pImageIndex); | |||
} | |||
VkResult radv_QueuePresentKHR( | |||
VkQueue _queue, | |||
const VkPresentInfoKHR* pPresentInfo) | |||
{ | |||
RADV_FROM_HANDLE(radv_queue, queue, _queue); | |||
VkResult result = VK_SUCCESS; | |||
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { | |||
RADV_FROM_HANDLE(radv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); | |||
assert(swapchain->device == queue->device); | |||
if (swapchain->fences[0] == VK_NULL_HANDLE) { | |||
result = radv_CreateFence(radv_device_to_handle(queue->device), | |||
&(VkFenceCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, | |||
.flags = 0, | |||
}, &swapchain->alloc, &swapchain->fences[0]); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
} else { | |||
radv_ResetFences(radv_device_to_handle(queue->device), | |||
1, &swapchain->fences[0]); | |||
} | |||
radv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]); | |||
result = swapchain->queue_present(swapchain, queue, | |||
pPresentInfo->pImageIndices[i]); | |||
/* TODO: What if one of them returns OUT_OF_DATE? */ | |||
if (result != VK_SUCCESS) | |||
return result; | |||
VkFence last = swapchain->fences[2]; | |||
swapchain->fences[2] = swapchain->fences[1]; | |||
swapchain->fences[1] = swapchain->fences[0]; | |||
swapchain->fences[0] = last; | |||
if (last != VK_NULL_HANDLE) { | |||
radv_WaitForFences(radv_device_to_handle(queue->device), | |||
1, &last, true, 1); | |||
} | |||
} | |||
return VK_SUCCESS; | |||
} |
@@ -0,0 +1,79 @@ | |||
/* | |||
* Copyright © 2016 Red Hat | |||
* based on intel anv code: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "radv_private.h" | |||
struct radv_swapchain; | |||
struct radv_wsi_interface { | |||
VkResult (*get_support)(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t queueFamilyIndex, | |||
VkBool32* pSupported); | |||
VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); | |||
VkResult (*get_formats)(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t* pSurfaceFormatCount, | |||
VkSurfaceFormatKHR* pSurfaceFormats); | |||
VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t* pPresentModeCount, | |||
VkPresentModeKHR* pPresentModes); | |||
VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, | |||
struct radv_device *device, | |||
const VkSwapchainCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct radv_swapchain **swapchain); | |||
}; | |||
struct radv_swapchain { | |||
struct radv_device *device; | |||
VkAllocationCallbacks alloc; | |||
VkFence fences[3]; | |||
VkResult (*destroy)(struct radv_swapchain *swapchain, | |||
const VkAllocationCallbacks *pAllocator); | |||
VkResult (*get_images)(struct radv_swapchain *swapchain, | |||
uint32_t *pCount, VkImage *pSwapchainImages); | |||
VkResult (*acquire_next_image)(struct radv_swapchain *swap_chain, | |||
uint64_t timeout, VkSemaphore semaphore, | |||
uint32_t *image_index); | |||
VkResult (*queue_present)(struct radv_swapchain *swap_chain, | |||
struct radv_queue *queue, | |||
uint32_t image_index); | |||
}; | |||
RADV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) | |||
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_swapchain, VkSwapchainKHR) | |||
VkResult radv_x11_init_wsi(struct radv_physical_device *physical_device); | |||
void radv_x11_finish_wsi(struct radv_physical_device *physical_device); | |||
VkResult radv_wl_init_wsi(struct radv_physical_device *physical_device); | |||
void radv_wl_finish_wsi(struct radv_physical_device *physical_device); |
@@ -0,0 +1,880 @@ | |||
/* | |||
* Copyright © 2016 Red Hat | |||
* based on intel anv code: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <wayland-client.h> | |||
#include <wayland-drm-client-protocol.h> | |||
#include "radv_wsi.h" | |||
#include "vk_format.h" | |||
#include <util/hash_table.h> | |||
#define MIN_NUM_IMAGES 2 | |||
struct wsi_wl_display { | |||
struct radv_physical_device *physical_device; | |||
struct wl_display * display; | |||
struct wl_drm * drm; | |||
/* Vector of VkFormats supported */ | |||
struct radv_vector formats; | |||
uint32_t capabilities; | |||
}; | |||
struct wsi_wayland { | |||
struct radv_wsi_interface base; | |||
struct radv_physical_device * physical_device; | |||
pthread_mutex_t mutex; | |||
/* Hash table of wl_display -> wsi_wl_display mappings */ | |||
struct hash_table * displays; | |||
}; | |||
static void | |||
wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) | |||
{ | |||
/* Don't add a format that's already in the list */ | |||
VkFormat *f; | |||
radv_vector_foreach(f, &display->formats) | |||
if (*f == format) | |||
return; | |||
/* Don't add formats that aren't renderable. */ | |||
VkFormatProperties props; | |||
radv_GetPhysicalDeviceFormatProperties( | |||
radv_physical_device_to_handle(display->physical_device), format, &props); | |||
if (!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) | |||
return; | |||
f = radv_vector_add(&display->formats); | |||
if (f) | |||
*f = format; | |||
} | |||
static void | |||
drm_handle_device(void *data, struct wl_drm *drm, const char *name) | |||
{ | |||
fprintf(stderr, "wl_drm.device(%s)\n", name); | |||
} | |||
static uint32_t | |||
wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) | |||
{ | |||
switch (vk_format) { | |||
/* TODO: Figure out what all the formats mean and make this table | |||
* correct. | |||
*/ | |||
#if 0 | |||
case VK_FORMAT_R4G4B4A4_UNORM: | |||
return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; | |||
case VK_FORMAT_R5G6B5_UNORM: | |||
return WL_DRM_FORMAT_BGR565; | |||
case VK_FORMAT_R5G5B5A1_UNORM: | |||
return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; | |||
case VK_FORMAT_R8G8B8_UNORM: | |||
return WL_DRM_FORMAT_XBGR8888; | |||
case VK_FORMAT_R8G8B8A8_UNORM: | |||
return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; | |||
case VK_FORMAT_R10G10B10A2_UNORM: | |||
return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; | |||
case VK_FORMAT_B4G4R4A4_UNORM: | |||
return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; | |||
case VK_FORMAT_B5G6R5_UNORM: | |||
return WL_DRM_FORMAT_RGB565; | |||
case VK_FORMAT_B5G5R5A1_UNORM: | |||
return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; | |||
#endif | |||
case VK_FORMAT_B8G8R8_SRGB: | |||
return WL_DRM_FORMAT_BGRX8888; | |||
case VK_FORMAT_B8G8R8A8_SRGB: | |||
return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; | |||
#if 0 | |||
case VK_FORMAT_B10G10R10A2_UNORM: | |||
return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; | |||
#endif | |||
default: | |||
assert(!"Unsupported Vulkan format"); | |||
return 0; | |||
} | |||
} | |||
static void | |||
drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) | |||
{ | |||
struct wsi_wl_display *display = data; | |||
switch (wl_format) { | |||
#if 0 | |||
case WL_DRM_FORMAT_ABGR4444: | |||
case WL_DRM_FORMAT_XBGR4444: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_BGR565: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_ABGR1555: | |||
case WL_DRM_FORMAT_XBGR1555: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_XBGR8888: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); | |||
/* fallthrough */ | |||
case WL_DRM_FORMAT_ABGR8888: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_ABGR2101010: | |||
case WL_DRM_FORMAT_XBGR2101010: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_ARGB4444: | |||
case WL_DRM_FORMAT_XRGB4444: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_RGB565: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); | |||
break; | |||
case WL_DRM_FORMAT_ARGB1555: | |||
case WL_DRM_FORMAT_XRGB1555: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); | |||
break; | |||
#endif | |||
case WL_DRM_FORMAT_XRGB8888: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); | |||
/* fallthrough */ | |||
case WL_DRM_FORMAT_ARGB8888: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); | |||
break; | |||
#if 0 | |||
case WL_DRM_FORMAT_ARGB2101010: | |||
case WL_DRM_FORMAT_XRGB2101010: | |||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); | |||
break; | |||
#endif | |||
} | |||
} | |||
static void | |||
drm_handle_authenticated(void *data, struct wl_drm *drm) | |||
{ | |||
} | |||
static void | |||
drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) | |||
{ | |||
struct wsi_wl_display *display = data; | |||
display->capabilities = capabilities; | |||
} | |||
static const struct wl_drm_listener drm_listener = { | |||
drm_handle_device, | |||
drm_handle_format, | |||
drm_handle_authenticated, | |||
drm_handle_capabilities, | |||
}; | |||
static void | |||
registry_handle_global(void *data, struct wl_registry *registry, | |||
uint32_t name, const char *interface, uint32_t version) | |||
{ | |||
struct wsi_wl_display *display = data; | |||
if (strcmp(interface, "wl_drm") == 0) { | |||
assert(display->drm == NULL); | |||
assert(version >= 2); | |||
display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); | |||
if (display->drm) | |||
wl_drm_add_listener(display->drm, &drm_listener, display); | |||
} | |||
} | |||
static void | |||
registry_handle_global_remove(void *data, struct wl_registry *registry, | |||
uint32_t name) | |||
{ /* No-op */ } | |||
static const struct wl_registry_listener registry_listener = { | |||
registry_handle_global, | |||
registry_handle_global_remove | |||
}; | |||
static void | |||
wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) | |||
{ | |||
radv_vector_finish(&display->formats); | |||
if (display->drm) | |||
wl_drm_destroy(display->drm); | |||
radv_free(&wsi->physical_device->instance->alloc, display); | |||
} | |||
static struct wsi_wl_display * | |||
wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) | |||
{ | |||
struct wsi_wl_display *display = | |||
radv_alloc(&wsi->physical_device->instance->alloc, sizeof(*display), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); | |||
if (!display) | |||
return NULL; | |||
memset(display, 0, sizeof(*display)); | |||
display->display = wl_display; | |||
display->physical_device = wsi->physical_device; | |||
if (!radv_vector_init(&display->formats, sizeof(VkFormat), 8)) | |||
goto fail; | |||
struct wl_registry *registry = wl_display_get_registry(wl_display); | |||
if (!registry) | |||
return NULL; | |||
wl_registry_add_listener(registry, ®istry_listener, display); | |||
/* Round-rip to get the wl_drm global */ | |||
wl_display_roundtrip(wl_display); | |||
if (!display->drm) | |||
goto fail; | |||
/* Round-rip to get wl_drm formats and capabilities */ | |||
wl_display_roundtrip(wl_display); | |||
/* We need prime support */ | |||
if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) | |||
goto fail; | |||
/* We don't need this anymore */ | |||
wl_registry_destroy(registry); | |||
return display; | |||
fail: | |||
if (registry) | |||
wl_registry_destroy(registry); | |||
wsi_wl_display_destroy(wsi, display); | |||
return NULL; | |||
} | |||
static struct wsi_wl_display * | |||
wsi_wl_get_display(struct radv_physical_device *device, | |||
struct wl_display *wl_display) | |||
{ | |||
struct wsi_wayland *wsi = | |||
(struct wsi_wayland *)device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; | |||
pthread_mutex_lock(&wsi->mutex); | |||
struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, | |||
wl_display); | |||
if (!entry) { | |||
/* We're about to make a bunch of blocking calls. Let's drop the | |||
* mutex for now so we don't block up too badly. | |||
*/ | |||
pthread_mutex_unlock(&wsi->mutex); | |||
struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); | |||
pthread_mutex_lock(&wsi->mutex); | |||
entry = _mesa_hash_table_search(wsi->displays, wl_display); | |||
if (entry) { | |||
/* Oops, someone raced us to it */ | |||
wsi_wl_display_destroy(wsi, display); | |||
} else { | |||
entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); | |||
} | |||
} | |||
pthread_mutex_unlock(&wsi->mutex); | |||
return entry->data; | |||
} | |||
VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR( | |||
VkPhysicalDevice physicalDevice, | |||
uint32_t queueFamilyIndex, | |||
struct wl_display* display) | |||
{ | |||
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); | |||
return wsi_wl_get_display(physical_device, display) != NULL; | |||
} | |||
static VkResult | |||
wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t queueFamilyIndex, | |||
VkBool32* pSupported) | |||
{ | |||
*pSupported = true; | |||
return VK_SUCCESS; | |||
} | |||
static const VkPresentModeKHR present_modes[] = { | |||
VK_PRESENT_MODE_MAILBOX_KHR, | |||
VK_PRESENT_MODE_FIFO_KHR, | |||
}; | |||
static VkResult | |||
wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
VkSurfaceCapabilitiesKHR* caps) | |||
{ | |||
caps->minImageCount = MIN_NUM_IMAGES; | |||
caps->maxImageCount = 4; | |||
caps->currentExtent = (VkExtent2D) { -1, -1 }; | |||
caps->minImageExtent = (VkExtent2D) { 1, 1 }; | |||
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; | |||
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; | |||
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; | |||
caps->maxImageArrayLayers = 1; | |||
caps->supportedCompositeAlpha = | |||
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | | |||
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; | |||
caps->supportedUsageFlags = | |||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | | |||
VK_IMAGE_USAGE_SAMPLED_BIT | | |||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | | |||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, | |||
struct radv_physical_device *device, | |||
uint32_t* pSurfaceFormatCount, | |||
VkSurfaceFormatKHR* pSurfaceFormats) | |||
{ | |||
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; | |||
struct wsi_wl_display *display = | |||
wsi_wl_get_display(device, surface->display); | |||
uint32_t count = radv_vector_length(&display->formats); | |||
if (pSurfaceFormats == NULL) { | |||
*pSurfaceFormatCount = count; | |||
return VK_SUCCESS; | |||
} | |||
assert(*pSurfaceFormatCount >= count); | |||
*pSurfaceFormatCount = count; | |||
VkFormat *f; | |||
radv_vector_foreach(f, &display->formats) { | |||
*(pSurfaceFormats++) = (VkSurfaceFormatKHR) { | |||
.format = *f, | |||
/* TODO: We should get this from the compositor somehow */ | |||
.colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, | |||
}; | |||
} | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t* pPresentModeCount, | |||
VkPresentModeKHR* pPresentModes) | |||
{ | |||
if (pPresentModes == NULL) { | |||
*pPresentModeCount = ARRAY_SIZE(present_modes); | |||
return VK_SUCCESS; | |||
} | |||
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); | |||
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); | |||
*pPresentModeCount = ARRAY_SIZE(present_modes); | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, | |||
struct radv_device *device, | |||
const VkSwapchainCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct radv_swapchain **swapchain); | |||
VkResult radv_CreateWaylandSurfaceKHR( | |||
VkInstance _instance, | |||
const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkSurfaceKHR* pSurface) | |||
{ | |||
RADV_FROM_HANDLE(radv_instance, instance, _instance); | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); | |||
VkIcdSurfaceWayland *surface; | |||
surface = radv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (surface == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; | |||
surface->display = pCreateInfo->display; | |||
surface->surface = pCreateInfo->surface; | |||
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); | |||
return VK_SUCCESS; | |||
} | |||
struct wsi_wl_image { | |||
struct radv_image * image; | |||
struct radv_device_memory * memory; | |||
struct wl_buffer * buffer; | |||
bool busy; | |||
}; | |||
struct wsi_wl_swapchain { | |||
struct radv_swapchain base; | |||
struct wsi_wl_display * display; | |||
struct wl_event_queue * queue; | |||
struct wl_surface * surface; | |||
VkExtent2D extent; | |||
VkFormat vk_format; | |||
uint32_t drm_format; | |||
VkPresentModeKHR present_mode; | |||
bool fifo_ready; | |||
uint32_t image_count; | |||
struct wsi_wl_image images[0]; | |||
}; | |||
static VkResult | |||
wsi_wl_swapchain_get_images(struct radv_swapchain *radv_chain, | |||
uint32_t *pCount, VkImage *pSwapchainImages) | |||
{ | |||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)radv_chain; | |||
if (pSwapchainImages == NULL) { | |||
*pCount = chain->image_count; | |||
return VK_SUCCESS; | |||
} | |||
assert(chain->image_count <= *pCount); | |||
for (uint32_t i = 0; i < chain->image_count; i++) | |||
pSwapchainImages[i] = radv_image_to_handle(chain->images[i].image); | |||
*pCount = chain->image_count; | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
wsi_wl_swapchain_acquire_next_image(struct radv_swapchain *radv_chain, | |||
uint64_t timeout, | |||
VkSemaphore semaphore, | |||
uint32_t *image_index) | |||
{ | |||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)radv_chain; | |||
int ret = wl_display_dispatch_queue_pending(chain->display->display, | |||
chain->queue); | |||
/* XXX: I'm not sure if out-of-date is the right error here. If | |||
* wl_display_dispatch_queue_pending fails it most likely means we got | |||
* kicked by the server so this seems more-or-less correct. | |||
*/ | |||
if (ret < 0) | |||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR); | |||
while (1) { | |||
for (uint32_t i = 0; i < chain->image_count; i++) { | |||
if (!chain->images[i].busy) { | |||
/* We found a non-busy image */ | |||
*image_index = i; | |||
return VK_SUCCESS; | |||
} | |||
} | |||
/* This time we do a blocking dispatch because we can't go | |||
* anywhere until we get an event. | |||
*/ | |||
int ret = wl_display_roundtrip_queue(chain->display->display, | |||
chain->queue); | |||
if (ret < 0) | |||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR); | |||
} | |||
} | |||
static void | |||
frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) | |||
{ | |||
struct wsi_wl_swapchain *chain = data; | |||
chain->fifo_ready = true; | |||
wl_callback_destroy(callback); | |||
} | |||
static const struct wl_callback_listener frame_listener = { | |||
frame_handle_done, | |||
}; | |||
static VkResult | |||
wsi_wl_swapchain_queue_present(struct radv_swapchain *radv_chain, | |||
struct radv_queue *queue, | |||
uint32_t image_index) | |||
{ | |||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)radv_chain; | |||
if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { | |||
while (!chain->fifo_ready) { | |||
int ret = wl_display_dispatch_queue(chain->display->display, | |||
chain->queue); | |||
if (ret < 0) | |||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR); | |||
} | |||
} | |||
assert(image_index < chain->image_count); | |||
wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); | |||
wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); | |||
if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { | |||
struct wl_callback *frame = wl_surface_frame(chain->surface); | |||
wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); | |||
wl_callback_add_listener(frame, &frame_listener, chain); | |||
chain->fifo_ready = false; | |||
} | |||
chain->images[image_index].busy = true; | |||
wl_surface_commit(chain->surface); | |||
wl_display_flush(chain->display->display); | |||
return VK_SUCCESS; | |||
} | |||
static void | |||
wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
VkDevice vk_device = radv_device_to_handle(chain->base.device); | |||
radv_FreeMemory(vk_device, radv_device_memory_to_handle(image->memory), | |||
pAllocator); | |||
radv_DestroyImage(vk_device, radv_image_to_handle(image->image), | |||
pAllocator); | |||
} | |||
static void | |||
buffer_handle_release(void *data, struct wl_buffer *buffer) | |||
{ | |||
struct wsi_wl_image *image = data; | |||
assert(image->buffer == buffer); | |||
image->busy = false; | |||
} | |||
static const struct wl_buffer_listener buffer_listener = { | |||
buffer_handle_release, | |||
}; | |||
static VkResult | |||
wsi_wl_image_init(struct wsi_wl_swapchain *chain, | |||
struct wsi_wl_image *image, | |||
const VkSwapchainCreateInfoKHR *pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator) | |||
{ | |||
VkDevice vk_device = radv_device_to_handle(chain->base.device); | |||
VkResult result; | |||
bool bret; | |||
VkImage vk_image; | |||
struct radeon_surf *surface; | |||
int fd; | |||
result = radv_image_create(vk_device, | |||
&(struct radv_image_create_info) { | |||
.vk_info = | |||
&(VkImageCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | |||
.imageType = VK_IMAGE_TYPE_2D, | |||
.format = chain->vk_format, | |||
.extent = { | |||
.width = chain->extent.width, | |||
.height = chain->extent.height, | |||
.depth = 1 | |||
}, | |||
.mipLevels = 1, | |||
.arrayLayers = 1, | |||
.samples = 1, | |||
/* FIXME: Need a way to use X tiling to allow scanout */ | |||
.tiling = VK_IMAGE_TILING_OPTIMAL, | |||
.usage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | | |||
pCreateInfo->imageUsage), | |||
.flags = 0, | |||
}, | |||
.scanout = true}, | |||
pAllocator, | |||
&vk_image); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
image->image = radv_image_from_handle(vk_image); | |||
assert(vk_format_is_color(image->image->vk_format)); | |||
VkDeviceMemory vk_memory; | |||
result = radv_AllocateMemory(vk_device, | |||
&(VkMemoryAllocateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | |||
.allocationSize = image->image->size, | |||
.memoryTypeIndex = 0, | |||
}, | |||
pAllocator, | |||
&vk_memory); | |||
if (result != VK_SUCCESS) | |||
goto fail_image; | |||
image->memory = radv_device_memory_from_handle(vk_memory); | |||
result = radv_BindImageMemory(vk_device, vk_image, vk_memory, 0); | |||
if (result != VK_SUCCESS) | |||
goto fail_mem; | |||
bret = chain->base.device->ws->buffer_get_fd(chain->base.device->ws, | |||
image->memory->bo, &fd); | |||
if (bret == false) | |||
goto fail_mem; | |||
{ | |||
struct radeon_bo_metadata metadata; | |||
radv_init_metadata(chain->base.device, image->image, &metadata); | |||
chain->base.device->ws->buffer_set_metadata(image->memory->bo, &metadata); | |||
} | |||
surface = &image->image->surface; | |||
image->buffer = wl_drm_create_prime_buffer(chain->display->drm, | |||
fd, /* name */ | |||
chain->extent.width, | |||
chain->extent.height, | |||
chain->drm_format, | |||
surface->level[0].offset, | |||
surface->level[0].pitch_bytes, | |||
0, 0, 0, 0 /* unused */); | |||
wl_display_roundtrip(chain->display->display); | |||
close(fd); | |||
wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); | |||
wl_buffer_add_listener(image->buffer, &buffer_listener, image); | |||
return VK_SUCCESS; | |||
fail_mem: | |||
radv_FreeMemory(vk_device, vk_memory, pAllocator); | |||
fail_image: | |||
radv_DestroyImage(vk_device, vk_image, pAllocator); | |||
return result; | |||
} | |||
static VkResult | |||
wsi_wl_swapchain_destroy(struct radv_swapchain *radv_chain, | |||
const VkAllocationCallbacks *pAllocator) | |||
{ | |||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)radv_chain; | |||
for (uint32_t i = 0; i < chain->image_count; i++) { | |||
if (chain->images[i].buffer) | |||
wsi_wl_image_finish(chain, &chain->images[i], pAllocator); | |||
} | |||
radv_free2(&chain->base.device->alloc, pAllocator, chain); | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, | |||
struct radv_device *device, | |||
const VkSwapchainCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct radv_swapchain **swapchain_out) | |||
{ | |||
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; | |||
struct wsi_wl_swapchain *chain; | |||
VkResult result; | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); | |||
int num_images = pCreateInfo->minImageCount; | |||
assert(num_images >= MIN_NUM_IMAGES); | |||
/* For true mailbox mode, we need at least 4 images: | |||
* 1) One to scan out from | |||
* 2) One to have queued for scan-out | |||
* 3) One to be currently held by the Wayland compositor | |||
* 4) One to render to | |||
*/ | |||
if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) | |||
num_images = MAX2(num_images, 4); | |||
size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); | |||
chain = radv_alloc2(&device->alloc, pAllocator, size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (chain == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
chain->base.device = device; | |||
chain->base.destroy = wsi_wl_swapchain_destroy; | |||
chain->base.get_images = wsi_wl_swapchain_get_images; | |||
chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; | |||
chain->base.queue_present = wsi_wl_swapchain_queue_present; | |||
chain->surface = surface->surface; | |||
chain->extent = pCreateInfo->imageExtent; | |||
chain->vk_format = pCreateInfo->imageFormat; | |||
chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); | |||
chain->present_mode = pCreateInfo->presentMode; | |||
chain->fifo_ready = true; | |||
chain->image_count = num_images; | |||
/* Mark a bunch of stuff as NULL. This way we can just call | |||
* destroy_swapchain for cleanup. | |||
*/ | |||
for (uint32_t i = 0; i < chain->image_count; i++) | |||
chain->images[i].buffer = NULL; | |||
chain->queue = NULL; | |||
chain->display = wsi_wl_get_display(&device->instance->physicalDevice, | |||
surface->display); | |||
if (!chain->display) { | |||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED); | |||
goto fail; | |||
} | |||
chain->queue = wl_display_create_queue(chain->display->display); | |||
if (!chain->queue) { | |||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED); | |||
goto fail; | |||
} | |||
for (uint32_t i = 0; i < chain->image_count; i++) { | |||
result = wsi_wl_image_init(chain, &chain->images[i], | |||
pCreateInfo, pAllocator); | |||
if (result != VK_SUCCESS) | |||
goto fail; | |||
chain->images[i].busy = false; | |||
} | |||
*swapchain_out = &chain->base; | |||
return VK_SUCCESS; | |||
fail: | |||
wsi_wl_swapchain_destroy(&chain->base, pAllocator); | |||
return result; | |||
} | |||
VkResult | |||
radv_wl_init_wsi(struct radv_physical_device *device) | |||
{ | |||
struct wsi_wayland *wsi; | |||
VkResult result; | |||
wsi = radv_alloc(&device->instance->alloc, sizeof(*wsi), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); | |||
if (!wsi) { | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
goto fail; | |||
} | |||
wsi->physical_device = device; | |||
int ret = pthread_mutex_init(&wsi->mutex, NULL); | |||
if (ret != 0) { | |||
if (ret == ENOMEM) { | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} else { | |||
/* FINISHME: Choose a better error. */ | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} | |||
goto fail_alloc; | |||
} | |||
wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, | |||
_mesa_key_pointer_equal); | |||
if (!wsi->displays) { | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
goto fail_mutex; | |||
} | |||
wsi->base.get_support = wsi_wl_surface_get_support; | |||
wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; | |||
wsi->base.get_formats = wsi_wl_surface_get_formats; | |||
wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; | |||
wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; | |||
device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; | |||
return VK_SUCCESS; | |||
fail_mutex: | |||
pthread_mutex_destroy(&wsi->mutex); | |||
fail_alloc: | |||
radv_free(&device->instance->alloc, wsi); | |||
fail: | |||
device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; | |||
return result; | |||
} | |||
void | |||
radv_wl_finish_wsi(struct radv_physical_device *device) | |||
{ | |||
struct wsi_wayland *wsi = | |||
(struct wsi_wayland *)device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; | |||
if (wsi) { | |||
_mesa_hash_table_destroy(wsi->displays, NULL); | |||
pthread_mutex_destroy(&wsi->mutex); | |||
radv_free(&device->instance->alloc, wsi); | |||
} | |||
} |
@@ -0,0 +1,963 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based mostly on anv driver which is: | |||
* Copyright © 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <X11/Xlib-xcb.h> | |||
#include <X11/xshmfence.h> | |||
#include <xcb/xcb.h> | |||
#include <xcb/dri3.h> | |||
#include <xcb/present.h> | |||
#include <errno.h> | |||
#include <unistd.h> | |||
#include "radv_wsi.h" | |||
#include "vk_format.h" | |||
#include "util/hash_table.h" | |||
struct wsi_x11_connection { | |||
bool has_dri3; | |||
bool has_present; | |||
}; | |||
struct wsi_x11 { | |||
struct radv_wsi_interface base; | |||
pthread_mutex_t mutex; | |||
/* Hash table of xcb_connection -> wsi_x11_connection mappings */ | |||
struct hash_table *connections; | |||
}; | |||
static struct wsi_x11_connection * | |||
wsi_x11_connection_create(struct radv_physical_device *device, | |||
xcb_connection_t *conn) | |||
{ | |||
xcb_query_extension_cookie_t dri3_cookie, pres_cookie; | |||
xcb_query_extension_reply_t *dri3_reply, *pres_reply; | |||
struct wsi_x11_connection *wsi_conn = | |||
radv_alloc(&device->instance->alloc, sizeof(*wsi_conn), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); | |||
if (!wsi_conn) | |||
return NULL; | |||
dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); | |||
pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); | |||
dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); | |||
pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); | |||
if (dri3_reply == NULL || pres_reply == NULL) { | |||
free(dri3_reply); | |||
free(pres_reply); | |||
radv_free(&device->instance->alloc, wsi_conn); | |||
return NULL; | |||
} | |||
wsi_conn->has_dri3 = dri3_reply->present != 0; | |||
wsi_conn->has_present = pres_reply->present != 0; | |||
free(dri3_reply); | |||
free(pres_reply); | |||
return wsi_conn; | |||
} | |||
static void | |||
wsi_x11_connection_destroy(struct radv_physical_device *device, | |||
struct wsi_x11_connection *conn) | |||
{ | |||
radv_free(&device->instance->alloc, conn); | |||
} | |||
static struct wsi_x11_connection * | |||
wsi_x11_get_connection(struct radv_physical_device *device, | |||
xcb_connection_t *conn) | |||
{ | |||
struct wsi_x11 *wsi = | |||
(struct wsi_x11 *)device->wsi[VK_ICD_WSI_PLATFORM_XCB]; | |||
pthread_mutex_lock(&wsi->mutex); | |||
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); | |||
if (!entry) { | |||
/* We're about to make a bunch of blocking calls. Let's drop the | |||
* mutex for now so we don't block up too badly. | |||
*/ | |||
pthread_mutex_unlock(&wsi->mutex); | |||
struct wsi_x11_connection *wsi_conn = | |||
wsi_x11_connection_create(device, conn); | |||
pthread_mutex_lock(&wsi->mutex); | |||
entry = _mesa_hash_table_search(wsi->connections, conn); | |||
if (entry) { | |||
/* Oops, someone raced us to it */ | |||
wsi_x11_connection_destroy(device, wsi_conn); | |||
} else { | |||
entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); | |||
} | |||
} | |||
pthread_mutex_unlock(&wsi->mutex); | |||
return entry->data; | |||
} | |||
static const VkSurfaceFormatKHR formats[] = { | |||
{ .format = VK_FORMAT_B8G8R8A8_UNORM, }, | |||
{ .format = VK_FORMAT_B8G8R8A8_SRGB, }, | |||
}; | |||
static const VkPresentModeKHR present_modes[] = { | |||
VK_PRESENT_MODE_MAILBOX_KHR, | |||
}; | |||
static xcb_screen_t * | |||
get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) | |||
{ | |||
xcb_screen_iterator_t screen_iter = | |||
xcb_setup_roots_iterator(xcb_get_setup(conn)); | |||
for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { | |||
if (screen_iter.data->root == root) | |||
return screen_iter.data; | |||
} | |||
return NULL; | |||
} | |||
static xcb_visualtype_t * | |||
screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, | |||
unsigned *depth) | |||
{ | |||
xcb_depth_iterator_t depth_iter = | |||
xcb_screen_allowed_depths_iterator(screen); | |||
for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { | |||
xcb_visualtype_iterator_t visual_iter = | |||
xcb_depth_visuals_iterator (depth_iter.data); | |||
for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { | |||
if (visual_iter.data->visual_id == visual_id) { | |||
if (depth) | |||
*depth = depth_iter.data->depth; | |||
return visual_iter.data; | |||
} | |||
} | |||
} | |||
return NULL; | |||
} | |||
static xcb_visualtype_t * | |||
connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, | |||
unsigned *depth) | |||
{ | |||
xcb_screen_iterator_t screen_iter = | |||
xcb_setup_roots_iterator(xcb_get_setup(conn)); | |||
/* For this we have to iterate over all of the screens which is rather | |||
* annoying. Fortunately, there is probably only 1. | |||
*/ | |||
for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { | |||
xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, | |||
visual_id, depth); | |||
if (visual) | |||
return visual; | |||
} | |||
return NULL; | |||
} | |||
static xcb_visualtype_t * | |||
get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, | |||
unsigned *depth) | |||
{ | |||
xcb_query_tree_cookie_t tree_cookie; | |||
xcb_get_window_attributes_cookie_t attrib_cookie; | |||
xcb_query_tree_reply_t *tree; | |||
xcb_get_window_attributes_reply_t *attrib; | |||
tree_cookie = xcb_query_tree(conn, window); | |||
attrib_cookie = xcb_get_window_attributes(conn, window); | |||
tree = xcb_query_tree_reply(conn, tree_cookie, NULL); | |||
attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); | |||
if (attrib == NULL || tree == NULL) { | |||
free(attrib); | |||
free(tree); | |||
return NULL; | |||
} | |||
xcb_window_t root = tree->root; | |||
xcb_visualid_t visual_id = attrib->visual; | |||
free(attrib); | |||
free(tree); | |||
xcb_screen_t *screen = get_screen_for_root(conn, root); | |||
if (screen == NULL) | |||
return NULL; | |||
return screen_get_visualtype(screen, visual_id, depth); | |||
} | |||
static bool | |||
visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) | |||
{ | |||
uint32_t rgb_mask = visual->red_mask | | |||
visual->green_mask | | |||
visual->blue_mask; | |||
uint32_t all_mask = 0xffffffff >> (32 - depth); | |||
/* Do we have bits left over after RGB? */ | |||
return (all_mask & ~rgb_mask) != 0; | |||
} | |||
VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR( | |||
VkPhysicalDevice physicalDevice, | |||
uint32_t queueFamilyIndex, | |||
xcb_connection_t* connection, | |||
xcb_visualid_t visual_id) | |||
{ | |||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); | |||
struct wsi_x11_connection *wsi_conn = | |||
wsi_x11_get_connection(device, connection); | |||
if (!wsi_conn->has_dri3) { | |||
fprintf(stderr, "vulkan: No DRI3 support\n"); | |||
return false; | |||
} | |||
unsigned visual_depth; | |||
if (!connection_get_visualtype(connection, visual_id, &visual_depth)) | |||
return false; | |||
if (visual_depth != 24 && visual_depth != 32) | |||
return false; | |||
return true; | |||
} | |||
VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR( | |||
VkPhysicalDevice physicalDevice, | |||
uint32_t queueFamilyIndex, | |||
Display* dpy, | |||
VisualID visualID) | |||
{ | |||
return radv_GetPhysicalDeviceXcbPresentationSupportKHR(physicalDevice, | |||
queueFamilyIndex, | |||
XGetXCBConnection(dpy), | |||
visualID); | |||
} | |||
static xcb_connection_t* | |||
x11_surface_get_connection(VkIcdSurfaceBase *icd_surface) | |||
{ | |||
if (icd_surface->platform == VK_ICD_WSI_PLATFORM_XLIB) | |||
return XGetXCBConnection(((VkIcdSurfaceXlib *)icd_surface)->dpy); | |||
else | |||
return ((VkIcdSurfaceXcb *)icd_surface)->connection; | |||
} | |||
static xcb_window_t | |||
x11_surface_get_window(VkIcdSurfaceBase *icd_surface) | |||
{ | |||
if (icd_surface->platform == VK_ICD_WSI_PLATFORM_XLIB) | |||
return ((VkIcdSurfaceXlib *)icd_surface)->window; | |||
else | |||
return ((VkIcdSurfaceXcb *)icd_surface)->window; | |||
} | |||
static VkResult | |||
x11_surface_get_support(VkIcdSurfaceBase *icd_surface, | |||
struct radv_physical_device *device, | |||
uint32_t queueFamilyIndex, | |||
VkBool32* pSupported) | |||
{ | |||
xcb_connection_t *conn = x11_surface_get_connection(icd_surface); | |||
xcb_window_t window = x11_surface_get_window(icd_surface); | |||
struct wsi_x11_connection *wsi_conn = | |||
wsi_x11_get_connection(device, conn); | |||
if (!wsi_conn) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
if (!wsi_conn->has_dri3) { | |||
fprintf(stderr, "vulkan: No DRI3 support\n"); | |||
*pSupported = false; | |||
return VK_SUCCESS; | |||
} | |||
unsigned visual_depth; | |||
if (!get_visualtype_for_window(conn, window, &visual_depth)) { | |||
*pSupported = false; | |||
return VK_SUCCESS; | |||
} | |||
if (visual_depth != 24 && visual_depth != 32) { | |||
*pSupported = false; | |||
return VK_SUCCESS; | |||
} | |||
*pSupported = true; | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, | |||
struct radv_physical_device *device, | |||
VkSurfaceCapabilitiesKHR *caps) | |||
{ | |||
xcb_connection_t *conn = x11_surface_get_connection(icd_surface); | |||
xcb_window_t window = x11_surface_get_window(icd_surface); | |||
xcb_get_geometry_cookie_t geom_cookie; | |||
xcb_generic_error_t *err; | |||
xcb_get_geometry_reply_t *geom; | |||
unsigned visual_depth; | |||
geom_cookie = xcb_get_geometry(conn, window); | |||
/* This does a round-trip. This is why we do get_geometry first and | |||
* wait to read the reply until after we have a visual. | |||
*/ | |||
xcb_visualtype_t *visual = | |||
get_visualtype_for_window(conn, window, &visual_depth); | |||
geom = xcb_get_geometry_reply(conn, geom_cookie, &err); | |||
if (geom) { | |||
VkExtent2D extent = { geom->width, geom->height }; | |||
caps->currentExtent = extent; | |||
caps->minImageExtent = extent; | |||
caps->maxImageExtent = extent; | |||
} else { | |||
/* This can happen if the client didn't wait for the configure event | |||
* to come back from the compositor. In that case, we don't know the | |||
* size of the window so we just return valid "I don't know" stuff. | |||
*/ | |||
caps->currentExtent = (VkExtent2D) { -1, -1 }; | |||
caps->minImageExtent = (VkExtent2D) { 1, 1 }; | |||
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; | |||
} | |||
free(err); | |||
free(geom); | |||
if (visual_has_alpha(visual, visual_depth)) { | |||
caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | | |||
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; | |||
} else { | |||
caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | | |||
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; | |||
} | |||
caps->minImageCount = 2; | |||
caps->maxImageCount = 4; | |||
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; | |||
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; | |||
caps->maxImageArrayLayers = 1; | |||
caps->supportedUsageFlags = | |||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | | |||
VK_IMAGE_USAGE_SAMPLED_BIT | | |||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | | |||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_surface_get_formats(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t *pSurfaceFormatCount, | |||
VkSurfaceFormatKHR *pSurfaceFormats) | |||
{ | |||
if (pSurfaceFormats == NULL) { | |||
*pSurfaceFormatCount = ARRAY_SIZE(formats); | |||
return VK_SUCCESS; | |||
} | |||
assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); | |||
typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); | |||
*pSurfaceFormatCount = ARRAY_SIZE(formats); | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_surface_get_present_modes(VkIcdSurfaceBase *surface, | |||
struct radv_physical_device *device, | |||
uint32_t *pPresentModeCount, | |||
VkPresentModeKHR *pPresentModes) | |||
{ | |||
if (pPresentModes == NULL) { | |||
*pPresentModeCount = ARRAY_SIZE(present_modes); | |||
return VK_SUCCESS; | |||
} | |||
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); | |||
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); | |||
*pPresentModeCount = ARRAY_SIZE(present_modes); | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_surface_create_swapchain(VkIcdSurfaceBase *surface, | |||
struct radv_device *device, | |||
const VkSwapchainCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct radv_swapchain **swapchain); | |||
VkResult radv_CreateXcbSurfaceKHR( | |||
VkInstance _instance, | |||
const VkXcbSurfaceCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkSurfaceKHR* pSurface) | |||
{ | |||
RADV_FROM_HANDLE(radv_instance, instance, _instance); | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); | |||
VkIcdSurfaceXcb *surface; | |||
surface = radv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (surface == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; | |||
surface->connection = pCreateInfo->connection; | |||
surface->window = pCreateInfo->window; | |||
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); | |||
return VK_SUCCESS; | |||
} | |||
VkResult radv_CreateXlibSurfaceKHR( | |||
VkInstance _instance, | |||
const VkXlibSurfaceCreateInfoKHR* pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
VkSurfaceKHR* pSurface) | |||
{ | |||
RADV_FROM_HANDLE(radv_instance, instance, _instance); | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR); | |||
VkIcdSurfaceXlib *surface; | |||
surface = radv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (surface == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
surface->base.platform = VK_ICD_WSI_PLATFORM_XLIB; | |||
surface->dpy = pCreateInfo->dpy; | |||
surface->window = pCreateInfo->window; | |||
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); | |||
return VK_SUCCESS; | |||
} | |||
struct x11_image { | |||
struct radv_image * image; | |||
struct radv_device_memory * memory; | |||
xcb_pixmap_t pixmap; | |||
bool busy; | |||
struct xshmfence * shm_fence; | |||
uint32_t sync_fence; | |||
}; | |||
struct x11_swapchain { | |||
struct radv_swapchain base; | |||
xcb_connection_t * conn; | |||
xcb_window_t window; | |||
xcb_gc_t gc; | |||
VkExtent2D extent; | |||
uint32_t image_count; | |||
xcb_present_event_t event_id; | |||
xcb_special_event_t * special_event; | |||
uint64_t send_sbc; | |||
uint32_t stamp; | |||
struct x11_image images[0]; | |||
}; | |||
static VkResult | |||
x11_get_images(struct radv_swapchain *radv_chain, | |||
uint32_t* pCount, VkImage *pSwapchainImages) | |||
{ | |||
struct x11_swapchain *chain = (struct x11_swapchain *)radv_chain; | |||
if (pSwapchainImages == NULL) { | |||
*pCount = chain->image_count; | |||
return VK_SUCCESS; | |||
} | |||
assert(chain->image_count <= *pCount); | |||
for (uint32_t i = 0; i < chain->image_count; i++) | |||
pSwapchainImages[i] = radv_image_to_handle(chain->images[i].image); | |||
*pCount = chain->image_count; | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_handle_dri3_present_event(struct x11_swapchain *chain, | |||
xcb_present_generic_event_t *event) | |||
{ | |||
switch (event->evtype) { | |||
case XCB_PRESENT_CONFIGURE_NOTIFY: { | |||
xcb_present_configure_notify_event_t *config = (void *) event; | |||
if (config->width != chain->extent.width || | |||
config->height != chain->extent.height) | |||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR); | |||
break; | |||
} | |||
case XCB_PRESENT_EVENT_IDLE_NOTIFY: { | |||
xcb_present_idle_notify_event_t *idle = (void *) event; | |||
for (unsigned i = 0; i < chain->image_count; i++) { | |||
if (chain->images[i].pixmap == idle->pixmap) { | |||
chain->images[i].busy = false; | |||
break; | |||
} | |||
} | |||
break; | |||
} | |||
case XCB_PRESENT_COMPLETE_NOTIFY: | |||
default: | |||
break; | |||
} | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_acquire_next_image(struct radv_swapchain *radv_chain, | |||
uint64_t timeout, | |||
VkSemaphore semaphore, | |||
uint32_t *image_index) | |||
{ | |||
struct x11_swapchain *chain = (struct x11_swapchain *)radv_chain; | |||
while (1) { | |||
for (uint32_t i = 0; i < chain->image_count; i++) { | |||
if (!chain->images[i].busy) { | |||
/* We found a non-busy image */ | |||
xshmfence_await(chain->images[i].shm_fence); | |||
*image_index = i; | |||
return VK_SUCCESS; | |||
} | |||
} | |||
xcb_flush(chain->conn); | |||
xcb_generic_event_t *event = | |||
xcb_wait_for_special_event(chain->conn, chain->special_event); | |||
if (!event) | |||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR); | |||
VkResult result = x11_handle_dri3_present_event(chain, (void *)event); | |||
free(event); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
} | |||
} | |||
static VkResult | |||
x11_queue_present(struct radv_swapchain *radv_chain, | |||
struct radv_queue *queue, | |||
uint32_t image_index) | |||
{ | |||
struct x11_swapchain *chain = (struct x11_swapchain *)radv_chain; | |||
struct x11_image *image = &chain->images[image_index]; | |||
assert(image_index < chain->image_count); | |||
uint32_t options = XCB_PRESENT_OPTION_NONE; | |||
int64_t target_msc = 0; | |||
int64_t divisor = 0; | |||
int64_t remainder = 0; | |||
options |= XCB_PRESENT_OPTION_ASYNC; | |||
xshmfence_reset(image->shm_fence); | |||
++chain->send_sbc; | |||
xcb_void_cookie_t cookie = | |||
xcb_present_pixmap(chain->conn, | |||
chain->window, | |||
image->pixmap, | |||
(uint32_t) chain->send_sbc, | |||
0, /* valid */ | |||
0, /* update */ | |||
0, /* x_off */ | |||
0, /* y_off */ | |||
XCB_NONE, /* target_crtc */ | |||
XCB_NONE, | |||
image->sync_fence, | |||
options, | |||
target_msc, | |||
divisor, | |||
remainder, 0, NULL); | |||
xcb_discard_reply(chain->conn, cookie.sequence); | |||
image->busy = true; | |||
xcb_flush(chain->conn); | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_image_init(struct radv_device *device, struct x11_swapchain *chain, | |||
const VkSwapchainCreateInfoKHR *pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct x11_image *image) | |||
{ | |||
xcb_void_cookie_t cookie; | |||
VkResult result = VK_SUCCESS; | |||
int fd; | |||
VkImage image_h; | |||
bool bret; | |||
struct radeon_surf *surface; | |||
result = radv_image_create(radv_device_to_handle(device), | |||
&(struct radv_image_create_info) { | |||
.vk_info = | |||
&(VkImageCreateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | |||
.imageType = VK_IMAGE_TYPE_2D, | |||
.format = pCreateInfo->imageFormat, | |||
.extent = { | |||
.width = pCreateInfo->imageExtent.width, | |||
.height = pCreateInfo->imageExtent.height, | |||
.depth = 1 | |||
}, | |||
.mipLevels = 1, | |||
.arrayLayers = 1, | |||
.samples = 1, | |||
/* FIXME: Need a way to use X tiling to allow scanout */ | |||
.tiling = VK_IMAGE_TILING_OPTIMAL, | |||
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, | |||
.flags = 0, | |||
}, | |||
.scanout = true}, | |||
NULL, | |||
&image_h); | |||
if (result != VK_SUCCESS) | |||
return result; | |||
image->image = radv_image_from_handle(image_h); | |||
VkDeviceMemory memory_h; | |||
result = radv_AllocateMemory(radv_device_to_handle(device), | |||
&(VkMemoryAllocateInfo) { | |||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | |||
.allocationSize = image->image->size, | |||
.memoryTypeIndex = 0, | |||
}, | |||
NULL /* XXX: pAllocator */, | |||
&memory_h); | |||
if (result != VK_SUCCESS) | |||
goto fail_create_image; | |||
image->memory = radv_device_memory_from_handle(memory_h); | |||
// image->memory->bo.is_winsys_bo = true; | |||
radv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0); | |||
bret = device->ws->buffer_get_fd(device->ws, | |||
image->memory->bo, &fd); | |||
if (bret == false) | |||
goto fail_alloc_memory; | |||
{ | |||
struct radeon_bo_metadata metadata; | |||
radv_init_metadata(device, image->image, &metadata); | |||
device->ws->buffer_set_metadata(image->memory->bo, &metadata); | |||
} | |||
surface = &image->image->surface; | |||
uint32_t bpp = 32; | |||
uint32_t depth = 24; | |||
image->pixmap = xcb_generate_id(chain->conn); | |||
cookie = | |||
xcb_dri3_pixmap_from_buffer_checked(chain->conn, | |||
image->pixmap, | |||
chain->window, | |||
image->image->size, | |||
pCreateInfo->imageExtent.width, | |||
pCreateInfo->imageExtent.height, | |||
surface->level[0].pitch_bytes, | |||
depth, bpp, fd); | |||
xcb_discard_reply(chain->conn, cookie.sequence); | |||
int fence_fd = xshmfence_alloc_shm(); | |||
if (fence_fd < 0) | |||
goto fail_pixmap; | |||
image->shm_fence = xshmfence_map_shm(fence_fd); | |||
if (image->shm_fence == NULL) | |||
goto fail_shmfence_alloc; | |||
image->sync_fence = xcb_generate_id(chain->conn); | |||
xcb_dri3_fence_from_fd(chain->conn, | |||
image->pixmap, | |||
image->sync_fence, | |||
false, | |||
fence_fd); | |||
image->busy = false; | |||
xshmfence_trigger(image->shm_fence); | |||
return VK_SUCCESS; | |||
fail_shmfence_alloc: | |||
close(fence_fd); | |||
fail_pixmap: | |||
cookie = xcb_free_pixmap(chain->conn, image->pixmap); | |||
xcb_discard_reply(chain->conn, cookie.sequence); | |||
fail_alloc_memory: | |||
radv_FreeMemory(radv_device_to_handle(chain->base.device), | |||
radv_device_memory_to_handle(image->memory), pAllocator); | |||
fail_create_image: | |||
radv_DestroyImage(radv_device_to_handle(chain->base.device), | |||
radv_image_to_handle(image->image), pAllocator); | |||
return result; | |||
} | |||
static void | |||
x11_image_finish(struct x11_swapchain *chain, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct x11_image *image) | |||
{ | |||
xcb_void_cookie_t cookie; | |||
cookie = xcb_sync_destroy_fence(chain->conn, image->sync_fence); | |||
xcb_discard_reply(chain->conn, cookie.sequence); | |||
xshmfence_unmap_shm(image->shm_fence); | |||
cookie = xcb_free_pixmap(chain->conn, image->pixmap); | |||
xcb_discard_reply(chain->conn, cookie.sequence); | |||
radv_DestroyImage(radv_device_to_handle(chain->base.device), | |||
radv_image_to_handle(image->image), pAllocator); | |||
radv_FreeMemory(radv_device_to_handle(chain->base.device), | |||
radv_device_memory_to_handle(image->memory), pAllocator); | |||
} | |||
static VkResult | |||
x11_swapchain_destroy(struct radv_swapchain *radv_chain, | |||
const VkAllocationCallbacks *pAllocator) | |||
{ | |||
struct x11_swapchain *chain = (struct x11_swapchain *)radv_chain; | |||
for (uint32_t i = 0; i < chain->image_count; i++) | |||
x11_image_finish(chain, pAllocator, &chain->images[i]); | |||
xcb_unregister_for_special_event(chain->conn, chain->special_event); | |||
radv_free2(&chain->base.device->alloc, pAllocator, chain); | |||
return VK_SUCCESS; | |||
} | |||
static VkResult | |||
x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, | |||
struct radv_device *device, | |||
const VkSwapchainCreateInfoKHR *pCreateInfo, | |||
const VkAllocationCallbacks* pAllocator, | |||
struct radv_swapchain **swapchain_out) | |||
{ | |||
struct x11_swapchain *chain; | |||
xcb_void_cookie_t cookie; | |||
VkResult result; | |||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); | |||
int num_images = pCreateInfo->minImageCount; | |||
/* For true mailbox mode, we need at least 4 images: | |||
* 1) One to scan out from | |||
* 2) One to have queued for scan-out | |||
* 3) One to be currently held by the Wayland compositor | |||
* 4) One to render to | |||
*/ | |||
if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) | |||
num_images = MAX(num_images, 4); | |||
size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); | |||
chain = radv_alloc2(&device->alloc, pAllocator, size, 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); | |||
if (chain == NULL) | |||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
chain->base.device = device; | |||
chain->base.destroy = x11_swapchain_destroy; | |||
chain->base.get_images = x11_get_images; | |||
chain->base.acquire_next_image = x11_acquire_next_image; | |||
chain->base.queue_present = x11_queue_present; | |||
chain->conn = x11_surface_get_connection(icd_surface); | |||
chain->window = x11_surface_get_window(icd_surface); | |||
chain->extent = pCreateInfo->imageExtent; | |||
chain->image_count = num_images; | |||
chain->send_sbc = 0; | |||
chain->event_id = xcb_generate_id(chain->conn); | |||
xcb_present_select_input(chain->conn, chain->event_id, chain->window, | |||
XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY | | |||
XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY | | |||
XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); | |||
/* Create an XCB event queue to hold present events outside of the usual | |||
* application event queue | |||
*/ | |||
chain->special_event = | |||
xcb_register_for_special_xge(chain->conn, &xcb_present_id, | |||
chain->event_id, NULL); | |||
chain->gc = xcb_generate_id(chain->conn); | |||
if (!chain->gc) { | |||
/* FINISHME: Choose a better error. */ | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
goto fail_register; | |||
} | |||
cookie = xcb_create_gc(chain->conn, | |||
chain->gc, | |||
chain->window, | |||
XCB_GC_GRAPHICS_EXPOSURES, | |||
(uint32_t []) { 0 }); | |||
xcb_discard_reply(chain->conn, cookie.sequence); | |||
uint32_t image = 0; | |||
for (; image < chain->image_count; image++) { | |||
result = x11_image_init(device, chain, pCreateInfo, pAllocator, | |||
&chain->images[image]); | |||
if (result != VK_SUCCESS) | |||
goto fail_init_images; | |||
} | |||
*swapchain_out = &chain->base; | |||
return VK_SUCCESS; | |||
fail_init_images: | |||
for (uint32_t j = 0; j < image; j++) | |||
x11_image_finish(chain, pAllocator, &chain->images[j]); | |||
fail_register: | |||
xcb_unregister_for_special_event(chain->conn, chain->special_event); | |||
radv_free2(&device->alloc, pAllocator, chain); | |||
return result; | |||
} | |||
VkResult | |||
radv_x11_init_wsi(struct radv_physical_device *device) | |||
{ | |||
struct wsi_x11 *wsi; | |||
VkResult result; | |||
wsi = radv_alloc(&device->instance->alloc, sizeof(*wsi), 8, | |||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); | |||
if (!wsi) { | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
goto fail; | |||
} | |||
int ret = pthread_mutex_init(&wsi->mutex, NULL); | |||
if (ret != 0) { | |||
if (ret == ENOMEM) { | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} else { | |||
/* FINISHME: Choose a better error. */ | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
} | |||
goto fail_alloc; | |||
} | |||
wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, | |||
_mesa_key_pointer_equal); | |||
if (!wsi->connections) { | |||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); | |||
goto fail_mutex; | |||
} | |||
wsi->base.get_support = x11_surface_get_support; | |||
wsi->base.get_capabilities = x11_surface_get_capabilities; | |||
wsi->base.get_formats = x11_surface_get_formats; | |||
wsi->base.get_present_modes = x11_surface_get_present_modes; | |||
wsi->base.create_swapchain = x11_surface_create_swapchain; | |||
device->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; | |||
device->wsi[VK_ICD_WSI_PLATFORM_XLIB] = &wsi->base; | |||
return VK_SUCCESS; | |||
fail_mutex: | |||
pthread_mutex_destroy(&wsi->mutex); | |||
fail_alloc: | |||
radv_free(&device->instance->alloc, wsi); | |||
fail: | |||
device->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; | |||
device->wsi[VK_ICD_WSI_PLATFORM_XLIB] = NULL; | |||
return result; | |||
} | |||
void | |||
radv_x11_finish_wsi(struct radv_physical_device *device) | |||
{ | |||
struct wsi_x11 *wsi = | |||
(struct wsi_x11 *)device->wsi[VK_ICD_WSI_PLATFORM_XCB]; | |||
if (wsi) { | |||
_mesa_hash_table_destroy(wsi->connections, NULL); | |||
pthread_mutex_destroy(&wsi->mutex); | |||
radv_free(&device->instance->alloc, wsi); | |||
} | |||
} |
@@ -0,0 +1,449 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Based on u_format.h which is: | |||
* Copyright 2009-2010 Vmware, Inc. | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#include <assert.h> | |||
#include <vulkan/vulkan.h> | |||
enum vk_format_layout { | |||
/** | |||
* Formats with vk_format_block::width == vk_format_block::height == 1 | |||
* that can be described as an ordinary data structure. | |||
*/ | |||
VK_FORMAT_LAYOUT_PLAIN = 0, | |||
/** | |||
* Formats with sub-sampled channels. | |||
* | |||
* This is for formats like YVYU where there is less than one sample per | |||
* pixel. | |||
*/ | |||
VK_FORMAT_LAYOUT_SUBSAMPLED = 3, | |||
/** | |||
* S3 Texture Compression formats. | |||
*/ | |||
VK_FORMAT_LAYOUT_S3TC = 4, | |||
/** | |||
* Red-Green Texture Compression formats. | |||
*/ | |||
VK_FORMAT_LAYOUT_RGTC = 5, | |||
/** | |||
* Ericsson Texture Compression | |||
*/ | |||
VK_FORMAT_LAYOUT_ETC = 6, | |||
/** | |||
* BC6/7 Texture Compression | |||
*/ | |||
VK_FORMAT_LAYOUT_BPTC = 7, | |||
/** | |||
* ASTC | |||
*/ | |||
VK_FORMAT_LAYOUT_ASTC = 8, | |||
/** | |||
* Everything else that doesn't fit in any of the above layouts. | |||
*/ | |||
VK_FORMAT_LAYOUT_OTHER = 9 | |||
}; | |||
struct vk_format_block | |||
{ | |||
/** Block width in pixels */ | |||
unsigned width; | |||
/** Block height in pixels */ | |||
unsigned height; | |||
/** Block size in bits */ | |||
unsigned bits; | |||
}; | |||
enum vk_format_type { | |||
VK_FORMAT_TYPE_VOID = 0, | |||
VK_FORMAT_TYPE_UNSIGNED = 1, | |||
VK_FORMAT_TYPE_SIGNED = 2, | |||
VK_FORMAT_TYPE_FIXED = 3, | |||
VK_FORMAT_TYPE_FLOAT = 4 | |||
}; | |||
enum vk_format_colorspace { | |||
VK_FORMAT_COLORSPACE_RGB = 0, | |||
VK_FORMAT_COLORSPACE_SRGB = 1, | |||
VK_FORMAT_COLORSPACE_YUV = 2, | |||
VK_FORMAT_COLORSPACE_ZS = 3 | |||
}; | |||
struct vk_format_channel_description { | |||
unsigned type:5; | |||
unsigned normalized:1; | |||
unsigned pure_integer:1; | |||
unsigned scaled:1; | |||
unsigned size:8; | |||
unsigned shift:16; | |||
}; | |||
struct vk_format_description | |||
{ | |||
VkFormat format; | |||
const char *name; | |||
const char *short_name; | |||
struct vk_format_block block; | |||
enum vk_format_layout layout; | |||
unsigned nr_channels:3; | |||
unsigned is_array:1; | |||
unsigned is_bitmask:1; | |||
unsigned is_mixed:1; | |||
struct vk_format_channel_description channel[4]; | |||
unsigned char swizzle[4]; | |||
enum vk_format_colorspace colorspace; | |||
}; | |||
extern const struct vk_format_description vk_format_description_table[]; | |||
const struct vk_format_description *vk_format_description(VkFormat format); | |||
/** | |||
* Return total bits needed for the pixel format per block. | |||
*/ | |||
static inline uint | |||
vk_format_get_blocksizebits(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
assert(desc); | |||
if (!desc) { | |||
return 0; | |||
} | |||
return desc->block.bits; | |||
} | |||
/** | |||
* Return bytes per block (not pixel) for the given format. | |||
*/ | |||
static inline uint | |||
vk_format_get_blocksize(VkFormat format) | |||
{ | |||
uint bits = vk_format_get_blocksizebits(format); | |||
uint bytes = bits / 8; | |||
assert(bits % 8 == 0); | |||
assert(bytes > 0); | |||
if (bytes == 0) { | |||
bytes = 1; | |||
} | |||
return bytes; | |||
} | |||
static inline uint | |||
vk_format_get_blockwidth(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
assert(desc); | |||
if (!desc) { | |||
return 1; | |||
} | |||
return desc->block.width; | |||
} | |||
static inline uint | |||
vk_format_get_blockheight(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
assert(desc); | |||
if (!desc) { | |||
return 1; | |||
} | |||
return desc->block.height; | |||
} | |||
/** | |||
* Return the index of the first non-void channel | |||
* -1 if no non-void channels | |||
*/ | |||
static inline int | |||
vk_format_get_first_non_void_channel(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
int i; | |||
for (i = 0; i < 4; i++) | |||
if (desc->channel[i].type != VK_FORMAT_TYPE_VOID) | |||
break; | |||
if (i == 4) | |||
return -1; | |||
return i; | |||
} | |||
enum vk_swizzle { | |||
VK_SWIZZLE_X, | |||
VK_SWIZZLE_Y, | |||
VK_SWIZZLE_Z, | |||
VK_SWIZZLE_W, | |||
VK_SWIZZLE_0, | |||
VK_SWIZZLE_1, | |||
VK_SWIZZLE_NONE, | |||
VK_SWIZZLE_MAX, /**< Number of enums counter (must be last) */ | |||
}; | |||
static inline VkImageAspectFlags | |||
vk_format_aspects(VkFormat format) | |||
{ | |||
switch (format) { | |||
case VK_FORMAT_UNDEFINED: | |||
return 0; | |||
case VK_FORMAT_S8_UINT: | |||
return VK_IMAGE_ASPECT_STENCIL_BIT; | |||
case VK_FORMAT_D16_UNORM_S8_UINT: | |||
case VK_FORMAT_D24_UNORM_S8_UINT: | |||
case VK_FORMAT_D32_SFLOAT_S8_UINT: | |||
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; | |||
case VK_FORMAT_D16_UNORM: | |||
case VK_FORMAT_X8_D24_UNORM_PACK32: | |||
case VK_FORMAT_D32_SFLOAT: | |||
return VK_IMAGE_ASPECT_DEPTH_BIT; | |||
default: | |||
return VK_IMAGE_ASPECT_COLOR_BIT; | |||
} | |||
} | |||
static inline enum vk_swizzle | |||
radv_swizzle_conv(int idx, const unsigned char chan[4], VkComponentSwizzle vk_swiz) | |||
{ | |||
int x; | |||
switch (vk_swiz) { | |||
case VK_COMPONENT_SWIZZLE_IDENTITY: | |||
return chan[idx]; | |||
case VK_COMPONENT_SWIZZLE_ZERO: | |||
return VK_SWIZZLE_0; | |||
case VK_COMPONENT_SWIZZLE_ONE: | |||
return VK_SWIZZLE_1; | |||
case VK_COMPONENT_SWIZZLE_R: | |||
for (x = 0; x < 4; x++) | |||
if (chan[x] == 0) | |||
return x; | |||
return VK_SWIZZLE_0; | |||
case VK_COMPONENT_SWIZZLE_G: | |||
for (x = 0; x < 4; x++) | |||
if (chan[x] == 1) | |||
return x; | |||
return VK_SWIZZLE_0; | |||
case VK_COMPONENT_SWIZZLE_B: | |||
for (x = 0; x < 4; x++) | |||
if (chan[x] == 2) | |||
return x; | |||
return VK_SWIZZLE_0; | |||
case VK_COMPONENT_SWIZZLE_A: | |||
for (x = 0; x < 4; x++) | |||
if (chan[x] == 3) | |||
return x; | |||
return VK_SWIZZLE_1; | |||
default: | |||
return chan[idx]; | |||
} | |||
} | |||
static inline void vk_format_compose_swizzles(const VkComponentMapping *mapping, | |||
const unsigned char swz[4], | |||
enum vk_swizzle dst[4]) | |||
{ | |||
dst[0] = radv_swizzle_conv(0, swz, mapping->r); | |||
dst[1] = radv_swizzle_conv(1, swz, mapping->g); | |||
dst[2] = radv_swizzle_conv(2, swz, mapping->b); | |||
dst[3] = radv_swizzle_conv(3, swz, mapping->a); | |||
} | |||
static inline bool | |||
vk_format_is_compressed(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
assert(desc); | |||
if (!desc) { | |||
return false; | |||
} | |||
switch (desc->layout) { | |||
case VK_FORMAT_LAYOUT_S3TC: | |||
case VK_FORMAT_LAYOUT_RGTC: | |||
case VK_FORMAT_LAYOUT_ETC: | |||
case VK_FORMAT_LAYOUT_BPTC: | |||
case VK_FORMAT_LAYOUT_ASTC: | |||
/* XXX add other formats in the future */ | |||
return true; | |||
default: | |||
return false; | |||
} | |||
} | |||
static inline bool | |||
vk_format_has_depth(const struct vk_format_description *desc) | |||
{ | |||
return desc->colorspace == VK_FORMAT_COLORSPACE_ZS && | |||
desc->swizzle[0] != VK_SWIZZLE_NONE; | |||
} | |||
static inline bool | |||
vk_format_has_stencil(const struct vk_format_description *desc) | |||
{ | |||
return desc->colorspace == VK_FORMAT_COLORSPACE_ZS && | |||
desc->swizzle[1] != VK_SWIZZLE_NONE; | |||
} | |||
static inline bool | |||
vk_format_is_depth_or_stencil(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
assert(desc); | |||
if (!desc) { | |||
return false; | |||
} | |||
return vk_format_has_depth(desc) || | |||
vk_format_has_stencil(desc); | |||
} | |||
static inline bool | |||
vk_format_is_depth(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
assert(desc); | |||
if (!desc) { | |||
return false; | |||
} | |||
return vk_format_has_depth(desc); | |||
} | |||
static inline bool | |||
vk_format_is_color(VkFormat format) | |||
{ | |||
return !vk_format_is_depth_or_stencil(format); | |||
} | |||
static inline VkFormat | |||
vk_format_depth_only(VkFormat format) | |||
{ | |||
switch (format) { | |||
case VK_FORMAT_D16_UNORM_S8_UINT: | |||
return VK_FORMAT_D16_UNORM; | |||
case VK_FORMAT_D24_UNORM_S8_UINT: | |||
return VK_FORMAT_X8_D24_UNORM_PACK32; | |||
case VK_FORMAT_D32_SFLOAT_S8_UINT: | |||
return VK_FORMAT_D32_SFLOAT; | |||
default: | |||
return format; | |||
} | |||
} | |||
static inline bool | |||
vk_format_is_int(VkFormat format) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
int channel = vk_format_get_first_non_void_channel(format); | |||
return channel >= 0 && desc->channel[channel].pure_integer; | |||
} | |||
static inline VkFormat | |||
vk_format_stencil_only(VkFormat format) | |||
{ | |||
return VK_FORMAT_S8_UINT; | |||
} | |||
static inline uint | |||
vk_format_get_component_bits(VkFormat format, | |||
enum vk_format_colorspace colorspace, | |||
uint component) | |||
{ | |||
const struct vk_format_description *desc = vk_format_description(format); | |||
enum vk_format_colorspace desc_colorspace; | |||
assert(format); | |||
if (!format) { | |||
return 0; | |||
} | |||
assert(component < 4); | |||
/* Treat RGB and SRGB as equivalent. */ | |||
if (colorspace == VK_FORMAT_COLORSPACE_SRGB) { | |||
colorspace = VK_FORMAT_COLORSPACE_RGB; | |||
} | |||
if (desc->colorspace == VK_FORMAT_COLORSPACE_SRGB) { | |||
desc_colorspace = VK_FORMAT_COLORSPACE_RGB; | |||
} else { | |||
desc_colorspace = desc->colorspace; | |||
} | |||
if (desc_colorspace != colorspace) { | |||
return 0; | |||
} | |||
switch (desc->swizzle[component]) { | |||
case VK_SWIZZLE_X: | |||
return desc->channel[0].size; | |||
case VK_SWIZZLE_Y: | |||
return desc->channel[1].size; | |||
case VK_SWIZZLE_Z: | |||
return desc->channel[2].size; | |||
case VK_SWIZZLE_W: | |||
return desc->channel[3].size; | |||
default: | |||
return 0; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
} // extern "C" { | |||
#endif |
@@ -0,0 +1,188 @@ | |||
/* this is pretty much taken from the gallium one. */ | |||
VK_FORMAT_UNDEFINED , plain, 1, 1, u8 , , , , x001, rgb | |||
VK_FORMAT_R4G4_UNORM_PACK8 , plain, 1, 1, un4 , un4 , , , xy01, rgb | |||
VK_FORMAT_R4G4B4A4_UNORM_PACK16 , plain, 1, 1, un4 , un4 , un4 , un4 , wzyx, rgb | |||
VK_FORMAT_B4G4R4A4_UNORM_PACK16 , plain, 1, 1, un4 , un4 , un4 , un4 , wxyz, rgb | |||
VK_FORMAT_R5G6B5_UNORM_PACK16 , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb | |||
VK_FORMAT_B5G6R5_UNORM_PACK16 , plain, 1, 1, un5 , un6 , un5 , , xyz1, rgb | |||
VK_FORMAT_R5G5B5A1_UNORM_PACK16 , plain, 1, 1, un1 , un5 , un5 , un5 , wzyx, rgb | |||
VK_FORMAT_B5G5R5A1_UNORM_PACK16 , plain, 1, 1, un1 , un5 , un5 , un5 , wxyz, rgb | |||
VK_FORMAT_A1R5G5B5_UNORM_PACK16 , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb | |||
VK_FORMAT_R8_UNORM , plain, 1, 1, un8 , , , , x001, rgb | |||
VK_FORMAT_R8_SNORM , plain, 1, 1, sn8 , , , , x001, rgb | |||
VK_FORMAT_R8_USCALED , plain, 1, 1, us8 , , , , x001, rgb | |||
VK_FORMAT_R8_SSCALED , plain, 1, 1, ss8 , , , , x001, rgb | |||
VK_FORMAT_R8_UINT , plain, 1, 1, up8 , , , , x001, rgb | |||
VK_FORMAT_R8_SINT , plain, 1, 1, sp8 , , , , x001, rgb | |||
VK_FORMAT_R8_SRGB , plain, 1, 1, un8 , , , , x001, srgb | |||
VK_FORMAT_R8G8_UNORM , plain, 1, 1, un8 , un8 , , , xy01, rgb | |||
VK_FORMAT_R8G8_SNORM , plain, 1, 1, sn8 , sn8 , , , xy01, rgb | |||
VK_FORMAT_R8G8_USCALED , plain, 1, 1, us8 , us8 , , , xy01, rgb | |||
VK_FORMAT_R8G8_SSCALED , plain, 1, 1, ss8 , ss8 , , , xy01, rgb | |||
VK_FORMAT_R8G8_UINT , plain, 1, 1, up8 , up8 , , , xy01, rgb | |||
VK_FORMAT_R8G8_SINT , plain, 1, 1, sp8 , sp8 , , , xy01, rgb | |||
VK_FORMAT_R8G8_SRGB , plain, 1, 1, un8 , un8 , , , xy01, srgb | |||
VK_FORMAT_R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , , xyz1, rgb | |||
VK_FORMAT_R8G8B8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb | |||
VK_FORMAT_R8G8B8_USCALED , plain, 1, 1, us8 , us8 , us8 , , xyz1, rgb | |||
VK_FORMAT_R8G8B8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , , xyz1, rgb | |||
VK_FORMAT_R8G8B8_UINT , plain, 1, 1, up8 , up8 , up8 , , xyz1, rgb | |||
VK_FORMAT_R8G8B8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , , xyz1, rgb | |||
VK_FORMAT_R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , , xyz1, srgb | |||
VK_FORMAT_B8G8R8_UNORM , plain, 1, 1, un8 , un8 , un8 , , zyx1, rgb | |||
VK_FORMAT_B8G8R8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , , zyx1, rgb | |||
VK_FORMAT_B8G8R8_USCALED , plain, 1, 1, us8 , us8 , us8 , , zyx1, rgb | |||
VK_FORMAT_B8G8R8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , , zyx1, rgb | |||
VK_FORMAT_B8G8R8_UINT , plain, 1, 1, up8 , up8 , up8 , , zyx1, rgb | |||
VK_FORMAT_B8G8R8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , , zyx1, rgb | |||
VK_FORMAT_B8G8R8_SRGB , plain, 1, 1, un8 , un8 , un8 , , zyx1, srgb | |||
VK_FORMAT_R8G8B8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb | |||
VK_FORMAT_R8G8B8A8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb | |||
VK_FORMAT_R8G8B8A8_USCALED , plain, 1, 1, us8 , us8 , us8 , us8 , xyzw, rgb | |||
VK_FORMAT_R8G8B8A8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , ss8 , xyzw, rgb | |||
VK_FORMAT_R8G8B8A8_UINT , plain, 1, 1, up8 , up8 , up8 , up8 , xyzw, rgb | |||
VK_FORMAT_R8G8B8A8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , xyzw, rgb | |||
VK_FORMAT_R8G8B8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb | |||
VK_FORMAT_B8G8R8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb | |||
VK_FORMAT_B8G8R8A8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , zyxw, rgb | |||
VK_FORMAT_B8G8R8A8_USCALED , plain, 1, 1, us8 , us8 , us8 , us8 , zyxw, rgb | |||
VK_FORMAT_B8G8R8A8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , ss8 , zyxw, rgb | |||
VK_FORMAT_B8G8R8A8_UINT , plain, 1, 1, up8 , up8 , up8 , up8 , zyxw, rgb | |||
VK_FORMAT_B8G8R8A8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , zyxw, rgb | |||
VK_FORMAT_B8G8R8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, srgb | |||
VK_FORMAT_A8B8G8R8_UNORM_PACK32 , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb | |||
VK_FORMAT_A8B8G8R8_SNORM_PACK32 , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb | |||
VK_FORMAT_A8B8G8R8_USCALED_PACK32 , plain, 1, 1, us8 , us8 , us8 , us8 , xyzw, rgb | |||
VK_FORMAT_A8B8G8R8_SSCALED_PACK32 , plain, 1, 1, ss8 , ss8 , ss8 , ss8 , xyzw, rgb | |||
VK_FORMAT_A8B8G8R8_UINT_PACK32 , plain, 1, 1, up8 , up8 , up8 , up8 , xyzw, rgb | |||
VK_FORMAT_A8B8G8R8_SINT_PACK32 , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , xyzw, rgb | |||
VK_FORMAT_A8B8G8R8_SRGB_PACK32 , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb | |||
VK_FORMAT_A2R10G10B10_UNORM_PACK32 , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb | |||
VK_FORMAT_A2R10G10B10_SNORM_PACK32 , plain, 1, 1, sn10, sn10, sn10, sn2 , zyxw, rgb | |||
VK_FORMAT_A2R10G10B10_USCALED_PACK32 , plain, 1, 1, us10, us10, us10, us2 , zyxw, rgb | |||
VK_FORMAT_A2R10G10B10_SSCALED_PACK32 , plain, 1, 1, ss10, ss10, ss10, ss2 , zyxw, rgb | |||
VK_FORMAT_A2R10G10B10_UINT_PACK32 , plain, 1, 1, up10, up10, up10, up2 , zyxw, rgb | |||
VK_FORMAT_A2R10G10B10_SINT_PACK32 , plain, 1, 1, sp10, sp10, sp10, sp2 , zyxw, rgb | |||
VK_FORMAT_A2B10G10R10_UNORM_PACK32 , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb | |||
VK_FORMAT_A2B10G10R10_SNORM_PACK32 , plain, 1, 1, sn10, sn10, sn10, sn2 , xyzw, rgb | |||
VK_FORMAT_A2B10G10R10_USCALED_PACK32 , plain, 1, 1, us10, us10, us10, us2 , xyzw, rgb | |||
VK_FORMAT_A2B10G10R10_SSCALED_PACK32 , plain, 1, 1, ss10, ss10, ss10, ss2 , xyzw, rgb | |||
VK_FORMAT_A2B10G10R10_UINT_PACK32 , plain, 1, 1, up10, up10, up10, up2 , xyzw, rgb | |||
VK_FORMAT_A2B10G10R10_SINT_PACK32 , plain, 1, 1, sp10, sp10, sp10, sp2 , xyzw, rgb | |||
VK_FORMAT_R16_UNORM , plain, 1, 1, un16, , , , x001, rgb | |||
VK_FORMAT_R16_SNORM , plain, 1, 1, sn16, , , , x001, rgb | |||
VK_FORMAT_R16_USCALED , plain, 1, 1, us16, , , , x001, rgb | |||
VK_FORMAT_R16_SSCALED , plain, 1, 1, ss16, , , , x001, rgb | |||
VK_FORMAT_R16_UINT , plain, 1, 1, up16, , , , x001, rgb | |||
VK_FORMAT_R16_SINT , plain, 1, 1, sp16, , , , x001, rgb | |||
VK_FORMAT_R16_SFLOAT , plain, 1, 1, f16 , , , , x001, rgb | |||
VK_FORMAT_R16G16_UNORM , plain, 1, 1, un16, un16, , , xy01, rgb | |||
VK_FORMAT_R16G16_SNORM , plain, 1, 1, sn16, sn16, , , xy01, rgb | |||
VK_FORMAT_R16G16_USCALED , plain, 1, 1, us16, us16, , , xy01, rgb | |||
VK_FORMAT_R16G16_SSCALED , plain, 1, 1, ss16, ss16, , , xy01, rgb | |||
VK_FORMAT_R16G16_UINT , plain, 1, 1, up16, up16, , , xy01, rgb | |||
VK_FORMAT_R16G16_SINT , plain, 1, 1, sp16, sp16, , , xy01, rgb | |||
VK_FORMAT_R16G16_SFLOAT , plain, 1, 1, f16 , f16 , , , xy01, rgb | |||
VK_FORMAT_R16G16B16_UNORM , plain, 1, 1, un16, un16, un16, , xyz1, rgb | |||
VK_FORMAT_R16G16B16_SNORM , plain, 1, 1, sn16, sn16, sn16, , xyz1, rgb | |||
VK_FORMAT_R16G16B16_USCALED , plain, 1, 1, us16, us16, us16, , xyz1, rgb | |||
VK_FORMAT_R16G16B16_SSCALED , plain, 1, 1, ss16, ss16, ss16, , xyz1, rgb | |||
VK_FORMAT_R16G16B16_UINT , plain, 1, 1, up16, up16, up16, , xyz1, rgb | |||
VK_FORMAT_R16G16B16_SINT , plain, 1, 1, sp16, sp16, sp16, , xyz1, rgb | |||
VK_FORMAT_R16G16B16_SFLOAT , plain, 1, 1, f16 , f16 , f16 , , xyz1, rgb | |||
VK_FORMAT_R16G16B16A16_UNORM , plain, 1, 1, un16, un16, un16, un16, xyzw, rgb | |||
VK_FORMAT_R16G16B16A16_SNORM , plain, 1, 1, sn16, sn16, sn16, sn16, xyzw, rgb | |||
VK_FORMAT_R16G16B16A16_USCALED , plain, 1, 1, us16, us16, us16, us16, xyzw, rgb | |||
VK_FORMAT_R16G16B16A16_SSCALED , plain, 1, 1, ss16, ss16, ss16, ss16, xyzw, rgb | |||
VK_FORMAT_R16G16B16A16_UINT , plain, 1, 1, up16, up16, up16, up16, xyzw, rgb | |||
VK_FORMAT_R16G16B16A16_SINT , plain, 1, 1, sp16, sp16, sp16, sp16, xyzw, rgb | |||
VK_FORMAT_R16G16B16A16_SFLOAT , plain, 1, 1, f16 , f16 , f16 , f16 , xyzw, rgb | |||
VK_FORMAT_R32_UINT , plain, 1, 1, up32, , , , x001, rgb | |||
VK_FORMAT_R32_SINT , plain, 1, 1, sp32, , , , x001, rgb | |||
VK_FORMAT_R32_SFLOAT , plain, 1, 1, f32 , , , , x001, rgb | |||
VK_FORMAT_R32G32_UINT , plain, 1, 1, up32, up32, , , xy01, rgb | |||
VK_FORMAT_R32G32_SINT , plain, 1, 1, sp32, sp32, , , xy01, rgb | |||
VK_FORMAT_R32G32_SFLOAT , plain, 1, 1, f32 , f32 , , , xy01, rgb | |||
VK_FORMAT_R32G32B32_UINT , plain, 1, 1, up32, up32, up32, , xyz1, rgb | |||
VK_FORMAT_R32G32B32_SINT , plain, 1, 1, sp32, sp32, sp32, , xyz1, rgb | |||
VK_FORMAT_R32G32B32_SFLOAT , plain, 1, 1, f32 , f32 , f32 , , xyz1, rgb | |||
VK_FORMAT_R32G32B32A32_UINT , plain, 1, 1, up32, up32, up32, up32, xyzw, rgb | |||
VK_FORMAT_R32G32B32A32_SINT , plain, 1, 1, sp32, sp32, sp32, sp32, xyzw, rgb | |||
VK_FORMAT_R32G32B32A32_SFLOAT , plain, 1, 1, f32 , f32 , f32 , f32 , xyzw, rgb | |||
VK_FORMAT_R64_UINT , plain, 1, 1, up64, , , , x001, rgb | |||
VK_FORMAT_R64_SINT , plain, 1, 1, sp64, , , , x001, rgb | |||
VK_FORMAT_R64_SFLOAT , plain, 1, 1, f64 , , , , x001, rgb | |||
VK_FORMAT_R64G64_UINT , plain, 1, 1, up64, up64, , , xy01, rgb | |||
VK_FORMAT_R64G64_SINT , plain, 1, 1, sp64, sp64, , , xy01, rgb | |||
VK_FORMAT_R64G64_SFLOAT , plain, 1, 1, f64 , f64 , , , xy01, rgb | |||
VK_FORMAT_R64G64B64_UINT , plain, 1, 1, up64, up64, up64, , xyz1, rgb | |||
VK_FORMAT_R64G64B64_SINT , plain, 1, 1, sp64, sp64, sp64, , xyz1, rgb | |||
VK_FORMAT_R64G64B64_SFLOAT , plain, 1, 1, f64 , f64 , f64 , , xyz1, rgb | |||
VK_FORMAT_R64G64B64A64_UINT , plain, 1, 1, up64, up64, up64, up64, xyzw, rgb | |||
VK_FORMAT_R64G64B64A64_SINT , plain, 1, 1, sp64, sp64, sp64, sp64, xyzw, rgb | |||
VK_FORMAT_R64G64B64A64_SFLOAT , plain, 1, 1, f64 , f64 , f64 , f64 , xyzw, rgb | |||
VK_FORMAT_B10G11R11_UFLOAT_PACK32 , other, 1, 1, x32 , , , , xyz1, rgb | |||
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 , other, 1, 1, x32 , , , , xyz1, rgb | |||
VK_FORMAT_D16_UNORM , plain, 1, 1, un16, , , , x___, zs | |||
VK_FORMAT_X8_D24_UNORM_PACK32 , plain, 1, 1, un24, x8 , , , x___, zs | |||
VK_FORMAT_D32_SFLOAT , plain, 1, 1, f32 , , , , x___, zs | |||
VK_FORMAT_S8_UINT , plain, 1, 1, up8 , , , , _x__, zs | |||
VK_FORMAT_D16_UNORM_S8_UINT , plain, 1, 1, un16, up8 , , , xy__, zs | |||
VK_FORMAT_D24_UNORM_S8_UINT , plain, 1, 1, un24, up8 , , , xy__, zs | |||
VK_FORMAT_D32_SFLOAT_S8_UINT , plain, 1, 1, f32 , up8 , , , xy__, zs | |||
VK_FORMAT_BC1_RGB_UNORM_BLOCK , s3tc, 4, 4, x64 , , , , xyz1, rgb | |||
VK_FORMAT_BC1_RGB_SRGB_BLOCK , s3tc, 4, 4, x64 , , , , xyz1, srgb | |||
VK_FORMAT_BC1_RGBA_UNORM_BLOCK , s3tc, 4, 4, x64 , , , , xyzw, rgb | |||
VK_FORMAT_BC1_RGBA_SRGB_BLOCK , s3tc, 4, 4, x64 , , , , xyzw, srgb | |||
VK_FORMAT_BC2_UNORM_BLOCK , s3tc, 4, 4, x128, , , , xyzw, rgb | |||
VK_FORMAT_BC2_SRGB_BLOCK , s3tc, 4, 4, x128, , , , xyzw, srgb | |||
VK_FORMAT_BC3_UNORM_BLOCK , s3tc, 4, 4, x128, , , , xyzw, rgb | |||
VK_FORMAT_BC3_SRGB_BLOCK , s3tc, 4, 4, x128, , , , xyzw, srgb | |||
VK_FORMAT_BC4_UNORM_BLOCK , rgtc, 4, 4, x64, , , , x001, rgb | |||
VK_FORMAT_BC4_SNORM_BLOCK , rgtc, 4, 4, x64, , , , x001, rgb | |||
VK_FORMAT_BC5_UNORM_BLOCK , rgtc, 4, 4, x128, , , , xy01, rgb | |||
VK_FORMAT_BC5_SNORM_BLOCK , rgtc, 4, 4, x128, , , , xy01, rgb | |||
VK_FORMAT_BC6H_UFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1, rgb | |||
VK_FORMAT_BC6H_SFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1, rgb | |||
VK_FORMAT_BC7_UNORM_BLOCK , bptc, 4, 4, x128, , , , xyzw, rgb | |||
VK_FORMAT_BC7_SRGB_BLOCK , bptc, 4, 4, x128, , , , xyzw, srgb | |||
VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, | |||
VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, | |||
VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, | |||
VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, | |||
VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, | |||
VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, | |||
VK_FORMAT_EAC_R11_UNORM_BLOCK, | |||
VK_FORMAT_EAC_R11_SNORM_BLOCK, | |||
VK_FORMAT_EAC_R11G11_UNORM_BLOCK, | |||
VK_FORMAT_EAC_R11G11_SNORM_BLOCK, | |||
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_5x4_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_5x4_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_5x5_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_6x6_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_8x5_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_8x8_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_10x5_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_10x5_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_10x6_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_10x6_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_12x10_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_12x10_SRGB_BLOCK, | |||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK, | |||
VK_FORMAT_ASTC_12x12_SRGB_BLOCK, |
@@ -0,0 +1,384 @@ | |||
#!/usr/bin/env python | |||
''' | |||
/************************************************************************** | |||
* | |||
* Copyright 2009 VMware, Inc. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
''' | |||
VOID, UNSIGNED, SIGNED, FIXED, FLOAT = range(5) | |||
SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_0, SWIZZLE_1, SWIZZLE_NONE, = range(7) | |||
PLAIN = 'plain' | |||
SCALED = 'scaled' | |||
RGB = 'rgb' | |||
SRGB = 'srgb' | |||
YUV = 'yuv' | |||
ZS = 'zs' | |||
def is_pot(x): | |||
return (x & (x - 1)) == 0 | |||
VERY_LARGE = 99999999999999999999999 | |||
class Channel: | |||
'''Describe the channel of a color channel.''' | |||
def __init__(self, type, norm, pure, scaled, size, name = ''): | |||
self.type = type | |||
self.norm = norm | |||
self.pure = pure | |||
self.size = size | |||
self.scaled = scaled | |||
self.sign = type in (SIGNED, FIXED, FLOAT) | |||
self.name = name | |||
def __str__(self): | |||
s = str(self.type) | |||
if self.norm: | |||
s += 'n' | |||
if self.pure: | |||
s += 'p' | |||
if self.scaled: | |||
s += 's' | |||
s += str(self.size) | |||
return s | |||
def __eq__(self, other): | |||
return self.type == other.type and self.norm == other.norm and self.pure == other.pure and self.size == other.size and self.scaled == other.scaled | |||
def max(self): | |||
'''Maximum representable number.''' | |||
if self.type == FLOAT: | |||
return VERY_LARGE | |||
if self.type == FIXED: | |||
return (1 << (self.size/2)) - 1 | |||
if self.norm: | |||
return 1 | |||
if self.type == UNSIGNED: | |||
return (1 << self.size) - 1 | |||
if self.type == SIGNED: | |||
return (1 << (self.size - 1)) - 1 | |||
assert False | |||
def min(self): | |||
'''Minimum representable number.''' | |||
if self.type == FLOAT: | |||
return -VERY_LARGE | |||
if self.type == FIXED: | |||
return -(1 << (self.size/2)) | |||
if self.type == UNSIGNED: | |||
return 0 | |||
if self.norm: | |||
return -1 | |||
if self.type == SIGNED: | |||
return -(1 << (self.size - 1)) | |||
assert False | |||
class Format: | |||
'''Describe a pixel format.''' | |||
def __init__(self, name, layout, block_width, block_height, le_channels, le_swizzles, be_channels, be_swizzles, colorspace): | |||
self.name = name | |||
self.layout = layout | |||
self.block_width = block_width | |||
self.block_height = block_height | |||
self.le_channels = le_channels | |||
self.le_swizzles = le_swizzles | |||
self.be_channels = be_channels | |||
self.be_swizzles = be_swizzles | |||
self.name = name | |||
self.colorspace = colorspace | |||
def __str__(self): | |||
return self.name | |||
def short_name(self): | |||
'''Make up a short norm for a format, suitable to be used as suffix in | |||
function names.''' | |||
name = self.name | |||
if name.startswith('VK_FORMAT_'): | |||
name = name[len('VK_FORMAT_'):] | |||
name = name.lower() | |||
return name | |||
def block_size(self): | |||
size = 0 | |||
for channel in self.le_channels: | |||
size += channel.size | |||
return size | |||
def nr_channels(self): | |||
nr_channels = 0 | |||
for channel in self.le_channels: | |||
if channel.size: | |||
nr_channels += 1 | |||
return nr_channels | |||
def array_element(self): | |||
if self.layout != PLAIN: | |||
return None | |||
ref_channel = self.le_channels[0] | |||
if ref_channel.type == VOID: | |||
ref_channel = self.le_channels[1] | |||
for channel in self.le_channels: | |||
if channel.size and (channel.size != ref_channel.size or channel.size % 8): | |||
return None | |||
if channel.type != VOID: | |||
if channel.type != ref_channel.type: | |||
return None | |||
if channel.norm != ref_channel.norm: | |||
return None | |||
if channel.pure != ref_channel.pure: | |||
return None | |||
if channel.scaled != ref_channel.scaled: | |||
return None | |||
return ref_channel | |||
def is_array(self): | |||
return self.array_element() != None | |||
def is_mixed(self): | |||
if self.layout != PLAIN: | |||
return False | |||
ref_channel = self.le_channels[0] | |||
if ref_channel.type == VOID: | |||
ref_channel = self.le_channels[1] | |||
for channel in self.le_channels[1:]: | |||
if channel.type != VOID: | |||
if channel.type != ref_channel.type: | |||
return True | |||
if channel.norm != ref_channel.norm: | |||
return True | |||
if channel.pure != ref_channel.pure: | |||
return True | |||
if channel.scaled != ref_channel.scaled: | |||
return True | |||
return False | |||
def is_pot(self): | |||
return is_pot(self.block_size()) | |||
def is_int(self): | |||
if self.layout != PLAIN: | |||
return False | |||
for channel in self.le_channels: | |||
if channel.type not in (VOID, UNSIGNED, SIGNED): | |||
return False | |||
return True | |||
def is_float(self): | |||
if self.layout != PLAIN: | |||
return False | |||
for channel in self.le_channels: | |||
if channel.type not in (VOID, FLOAT): | |||
return False | |||
return True | |||
def is_bitmask(self): | |||
if self.layout != PLAIN: | |||
return False | |||
if self.block_size() not in (8, 16, 32): | |||
return False | |||
for channel in self.le_channels: | |||
if channel.type not in (VOID, UNSIGNED, SIGNED): | |||
return False | |||
return True | |||
def is_pure_color(self): | |||
if self.layout != PLAIN or self.colorspace == ZS: | |||
return False | |||
pures = [channel.pure | |||
for channel in self.le_channels | |||
if channel.type != VOID] | |||
for x in pures: | |||
assert x == pures[0] | |||
return pures[0] | |||
def channel_type(self): | |||
types = [channel.type | |||
for channel in self.le_channels | |||
if channel.type != VOID] | |||
for x in types: | |||
assert x == types[0] | |||
return types[0] | |||
def is_pure_signed(self): | |||
return self.is_pure_color() and self.channel_type() == SIGNED | |||
def is_pure_unsigned(self): | |||
return self.is_pure_color() and self.channel_type() == UNSIGNED | |||
def has_channel(self, id): | |||
return self.le_swizzles[id] != SWIZZLE_NONE | |||
def has_depth(self): | |||
return self.colorspace == ZS and self.has_channel(0) | |||
def has_stencil(self): | |||
return self.colorspace == ZS and self.has_channel(1) | |||
def stride(self): | |||
return self.block_size()/8 | |||
_type_parse_map = { | |||
'': VOID, | |||
'x': VOID, | |||
'u': UNSIGNED, | |||
's': SIGNED, | |||
'h': FIXED, | |||
'f': FLOAT, | |||
} | |||
_swizzle_parse_map = { | |||
'x': SWIZZLE_X, | |||
'y': SWIZZLE_Y, | |||
'z': SWIZZLE_Z, | |||
'w': SWIZZLE_W, | |||
'0': SWIZZLE_0, | |||
'1': SWIZZLE_1, | |||
'_': SWIZZLE_NONE, | |||
} | |||
def _parse_channels(fields, layout, colorspace, swizzles): | |||
if layout == PLAIN: | |||
names = ['']*4 | |||
if colorspace in (RGB, SRGB): | |||
for i in range(4): | |||
swizzle = swizzles[i] | |||
if swizzle < 4: | |||
names[swizzle] += 'rgba'[i] | |||
elif colorspace == ZS: | |||
for i in range(4): | |||
swizzle = swizzles[i] | |||
if swizzle < 4: | |||
names[swizzle] += 'zs'[i] | |||
else: | |||
assert False | |||
for i in range(4): | |||
if names[i] == '': | |||
names[i] = 'x' | |||
else: | |||
names = ['x', 'y', 'z', 'w'] | |||
channels = [] | |||
for i in range(0, 4): | |||
field = fields[i] | |||
if field: | |||
type = _type_parse_map[field[0]] | |||
if field[1] == 'n': | |||
norm = True | |||
pure = False | |||
scaled = False | |||
size = int(field[2:]) | |||
elif field[1] == 'p': | |||
pure = True | |||
norm = False | |||
scaled = False | |||
size = int(field[2:]) | |||
elif field[1] == 's': | |||
pure = False | |||
norm = False | |||
scaled = True | |||
size = int(field[2:]) | |||
else: | |||
norm = False | |||
pure = False | |||
scaled = False | |||
size = int(field[1:]) | |||
else: | |||
type = VOID | |||
norm = False | |||
pure = False | |||
scaled = False | |||
size = 0 | |||
channel = Channel(type, norm, pure, scaled, size, names[i]) | |||
channels.append(channel) | |||
return channels | |||
def parse(filename): | |||
'''Parse the format description in CSV format in terms of the | |||
Channel and Format classes above.''' | |||
stream = open(filename) | |||
formats = [] | |||
for line in stream: | |||
try: | |||
comment = line.index('#') | |||
except ValueError: | |||
pass | |||
else: | |||
line = line[:comment] | |||
line = line.strip() | |||
if not line: | |||
continue | |||
fields = [field.strip() for field in line.split(',')] | |||
if len (fields) < 10: | |||
continue | |||
if len (fields) == 10: | |||
fields += fields[4:9] | |||
assert len (fields) == 15 | |||
name = fields[0] | |||
layout = fields[1] | |||
block_width, block_height = map(int, fields[2:4]) | |||
colorspace = fields[9] | |||
le_swizzles = [_swizzle_parse_map[swizzle] for swizzle in fields[8]] | |||
le_channels = _parse_channels(fields[4:8], layout, colorspace, le_swizzles) | |||
be_swizzles = [_swizzle_parse_map[swizzle] for swizzle in fields[14]] | |||
be_channels = _parse_channels(fields[10:14], layout, colorspace, be_swizzles) | |||
le_shift = 0 | |||
for channel in le_channels: | |||
channel.shift = le_shift | |||
le_shift += channel.size | |||
be_shift = 0 | |||
for channel in be_channels[3::-1]: | |||
channel.shift = be_shift | |||
be_shift += channel.size | |||
assert le_shift == be_shift | |||
for i in range(4): | |||
assert (le_swizzles[i] != SWIZZLE_NONE) == (be_swizzles[i] != SWIZZLE_NONE) | |||
format = Format(name, layout, block_width, block_height, le_channels, le_swizzles, be_channels, be_swizzles, colorspace) | |||
formats.append(format) | |||
return formats | |||
@@ -0,0 +1,173 @@ | |||
#!/usr/bin/env python | |||
CopyRight = ''' | |||
/************************************************************************** | |||
* | |||
* Copyright 2010 VMware, Inc. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
''' | |||
import sys | |||
from vk_format_parse import * | |||
def layout_map(layout): | |||
return 'VK_FORMAT_LAYOUT_' + str(layout).upper() | |||
def colorspace_map(colorspace): | |||
return 'VK_FORMAT_COLORSPACE_' + str(colorspace).upper() | |||
colorspace_channels_map = { | |||
'rgb': ['r', 'g', 'b', 'a'], | |||
'srgb': ['sr', 'sg', 'sb', 'a'], | |||
'zs': ['z', 's'], | |||
'yuv': ['y', 'u', 'v'], | |||
} | |||
type_map = { | |||
VOID: "VK_FORMAT_TYPE_VOID", | |||
UNSIGNED: "VK_FORMAT_TYPE_UNSIGNED", | |||
SIGNED: "VK_FORMAT_TYPE_SIGNED", | |||
FIXED: "VK_FORMAT_TYPE_FIXED", | |||
FLOAT: "VK_FORMAT_TYPE_FLOAT", | |||
} | |||
def bool_map(value): | |||
if value: | |||
return "true" | |||
else: | |||
return "false" | |||
swizzle_map = { | |||
SWIZZLE_X: "VK_SWIZZLE_X", | |||
SWIZZLE_Y: "VK_SWIZZLE_Y", | |||
SWIZZLE_Z: "VK_SWIZZLE_Z", | |||
SWIZZLE_W: "VK_SWIZZLE_W", | |||
SWIZZLE_0: "VK_SWIZZLE_0", | |||
SWIZZLE_1: "VK_SWIZZLE_1", | |||
SWIZZLE_NONE: "VK_SWIZZLE_NONE", | |||
} | |||
def print_channels(format, func): | |||
if format.nr_channels() <= 1: | |||
func(format.le_channels, format.le_swizzles) | |||
else: | |||
print '#ifdef PIPE_ARCH_BIG_ENDIAN' | |||
func(format.be_channels, format.be_swizzles) | |||
print '#else' | |||
func(format.le_channels, format.le_swizzles) | |||
print '#endif' | |||
def write_format_table(formats): | |||
print '/* This file is autogenerated by u_format_table.py from u_format.csv. Do not edit directly. */' | |||
# This will print the copyright message on the top of this file | |||
print CopyRight.strip() | |||
print '#include "stdbool.h"' | |||
print '#include "vk_format.h"' | |||
def do_channel_array(channels, swizzles): | |||
print " {" | |||
for i in range(4): | |||
channel = channels[i] | |||
if i < 3: | |||
sep = "," | |||
else: | |||
sep = "" | |||
if channel.size: | |||
print " {%s, %s, %s, %s, %u, %u}%s\t/* %s = %s */" % (type_map[channel.type], bool_map(channel.norm), bool_map(channel.pure), bool_map(channel.scaled), channel.size, channel.shift, sep, "xyzw"[i], channel.name) | |||
else: | |||
print " {0, 0, 0, 0, 0}%s" % (sep,) | |||
print " }," | |||
def do_swizzle_array(channels, swizzles): | |||
print " {" | |||
for i in range(4): | |||
swizzle = swizzles[i] | |||
if i < 3: | |||
sep = "," | |||
else: | |||
sep = "" | |||
try: | |||
comment = colorspace_channels_map[format.colorspace][i] | |||
except (KeyError, IndexError): | |||
comment = 'ignored' | |||
print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) | |||
print " }," | |||
for format in formats: | |||
print 'const struct vk_format_description' | |||
print 'vk_format_%s_description = {' % (format.short_name(),) | |||
print " %s," % (format.name,) | |||
print " \"%s\"," % (format.name,) | |||
print " \"%s\"," % (format.short_name(),) | |||
print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) | |||
print " %s," % (layout_map(format.layout),) | |||
print " %u,\t/* nr_channels */" % (format.nr_channels(),) | |||
print " %s,\t/* is_array */" % (bool_map(format.is_array()),) | |||
print " %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),) | |||
print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),) | |||
print_channels(format, do_channel_array) | |||
print_channels(format, do_swizzle_array) | |||
print " %s," % (colorspace_map(format.colorspace),) | |||
print "};" | |||
print "const struct vk_format_description *" | |||
print "vk_format_description(VkFormat format)" | |||
print "{" | |||
print " if (format > VK_FORMAT_END_RANGE) {" | |||
print " return NULL;" | |||
print " }" | |||
print " switch (format) {" | |||
for format in formats: | |||
print " case %s:" % format.name | |||
print " return &vk_format_%s_description;" % (format.short_name(),) | |||
print " default:" | |||
print " return NULL;" | |||
print " }" | |||
print "}" | |||
def main(): | |||
formats = [] | |||
for arg in sys.argv[1:]: | |||
formats.extend(parse(arg)) | |||
write_format_table(formats) | |||
if __name__ == '__main__': | |||
main() |
@@ -0,0 +1,297 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <stdio.h> | |||
#include "radv_amdgpu_bo.h" | |||
#include <amdgpu.h> | |||
#include <amdgpu_drm.h> | |||
#include <inttypes.h> | |||
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) | |||
{ | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
if (bo->ws->debug_all_bos) { | |||
pthread_mutex_lock(&bo->ws->global_bo_list_lock); | |||
LIST_DEL(&bo->global_list_item); | |||
bo->ws->num_buffers--; | |||
pthread_mutex_unlock(&bo->ws->global_bo_list_lock); | |||
} | |||
amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP); | |||
amdgpu_va_range_free(bo->va_handle); | |||
amdgpu_bo_free(bo->bo); | |||
FREE(bo); | |||
} | |||
static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo) | |||
{ | |||
struct radv_amdgpu_winsys *ws = bo->ws; | |||
if (bo->ws->debug_all_bos) { | |||
pthread_mutex_lock(&ws->global_bo_list_lock); | |||
LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list); | |||
ws->num_buffers++; | |||
pthread_mutex_unlock(&ws->global_bo_list_lock); | |||
} | |||
} | |||
static struct radeon_winsys_bo * | |||
radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, | |||
uint64_t size, | |||
unsigned alignment, | |||
enum radeon_bo_domain initial_domain, | |||
unsigned flags) | |||
{ | |||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); | |||
struct radv_amdgpu_winsys_bo *bo; | |||
struct amdgpu_bo_alloc_request request = {0}; | |||
amdgpu_bo_handle buf_handle; | |||
uint64_t va = 0; | |||
amdgpu_va_handle va_handle; | |||
int r; | |||
bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); | |||
if (!bo) { | |||
return NULL; | |||
} | |||
request.alloc_size = size; | |||
request.phys_alignment = alignment; | |||
if (initial_domain & RADEON_DOMAIN_VRAM) | |||
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; | |||
if (initial_domain & RADEON_DOMAIN_GTT) | |||
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; | |||
if (flags & RADEON_FLAG_CPU_ACCESS) | |||
request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | |||
if (flags & RADEON_FLAG_NO_CPU_ACCESS) | |||
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | |||
if (flags & RADEON_FLAG_GTT_WC) | |||
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; | |||
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); | |||
fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); | |||
fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); | |||
fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); | |||
goto error_bo_alloc; | |||
} | |||
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, | |||
size, alignment, 0, &va, &va_handle, 0); | |||
if (r) | |||
goto error_va_alloc; | |||
r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP); | |||
if (r) | |||
goto error_va_map; | |||
bo->bo = buf_handle; | |||
bo->va = va; | |||
bo->va_handle = va_handle; | |||
bo->initial_domain = initial_domain; | |||
bo->size = size; | |||
bo->is_shared = false; | |||
bo->ws = ws; | |||
radv_amdgpu_add_buffer_to_global_list(bo); | |||
return (struct radeon_winsys_bo *)bo; | |||
error_va_map: | |||
amdgpu_va_range_free(va_handle); | |||
error_va_alloc: | |||
amdgpu_bo_free(buf_handle); | |||
error_bo_alloc: | |||
FREE(bo); | |||
return NULL; | |||
} | |||
static uint64_t radv_amdgpu_winsys_bo_get_va(struct radeon_winsys_bo *_bo) | |||
{ | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
return bo->va; | |||
} | |||
static void * | |||
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo) | |||
{ | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
int ret; | |||
void *data; | |||
ret = amdgpu_bo_cpu_map(bo->bo, &data); | |||
if (ret) | |||
return NULL; | |||
return data; | |||
} | |||
static void | |||
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo) | |||
{ | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
amdgpu_bo_cpu_unmap(bo->bo); | |||
} | |||
static struct radeon_winsys_bo * | |||
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, | |||
int fd, unsigned *stride, | |||
unsigned *offset) | |||
{ | |||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); | |||
struct radv_amdgpu_winsys_bo *bo; | |||
uint64_t va; | |||
amdgpu_va_handle va_handle; | |||
enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; | |||
struct amdgpu_bo_import_result result = {0}; | |||
struct amdgpu_bo_info info = {0}; | |||
enum radeon_bo_domain initial = 0; | |||
int r; | |||
bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); | |||
if (!bo) | |||
return NULL; | |||
r = amdgpu_bo_import(ws->dev, type, fd, &result); | |||
if (r) | |||
goto error; | |||
r = amdgpu_bo_query_info(result.buf_handle, &info); | |||
if (r) | |||
goto error_query; | |||
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, | |||
result.alloc_size, 1 << 20, 0, &va, &va_handle, 0); | |||
if (r) | |||
goto error_query; | |||
r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP); | |||
if (r) | |||
goto error_va_map; | |||
if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) | |||
initial |= RADEON_DOMAIN_VRAM; | |||
if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) | |||
initial |= RADEON_DOMAIN_GTT; | |||
bo->bo = result.buf_handle; | |||
bo->va = va; | |||
bo->va_handle = va_handle; | |||
bo->initial_domain = initial; | |||
bo->size = result.alloc_size; | |||
bo->is_shared = true; | |||
return (struct radeon_winsys_bo *)bo; | |||
error_va_map: | |||
amdgpu_va_range_free(va_handle); | |||
error_query: | |||
amdgpu_bo_free(result.buf_handle); | |||
error: | |||
FREE(bo); | |||
return NULL; | |||
} | |||
static bool | |||
radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, | |||
struct radeon_winsys_bo *_bo, | |||
int *fd) | |||
{ | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; | |||
int r; | |||
unsigned handle; | |||
r = amdgpu_bo_export(bo->bo, type, &handle); | |||
if (r) | |||
return false; | |||
*fd = (int)handle; | |||
bo->is_shared = true; | |||
return true; | |||
} | |||
static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split) | |||
{ | |||
switch (eg_tile_split) { | |||
case 64: return 0; | |||
case 128: return 1; | |||
case 256: return 2; | |||
case 512: return 3; | |||
default: | |||
case 1024: return 4; | |||
case 2048: return 5; | |||
case 4096: return 6; | |||
} | |||
} | |||
static void | |||
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo, | |||
struct radeon_bo_metadata *md) | |||
{ | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
struct amdgpu_bo_metadata metadata = {0}; | |||
uint32_t tiling_flags = 0; | |||
if (md->macrotile == RADEON_LAYOUT_TILED) | |||
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ | |||
else if (md->microtile == RADEON_LAYOUT_TILED) | |||
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ | |||
else | |||
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ | |||
tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config); | |||
tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw)); | |||
tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh)); | |||
if (md->tile_split) | |||
tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->tile_split)); | |||
tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea)); | |||
tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1); | |||
if (md->scanout) | |||
tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ | |||
else | |||
tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ | |||
metadata.tiling_info = tiling_flags; | |||
metadata.size_metadata = md->size_metadata; | |||
memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata)); | |||
amdgpu_bo_set_metadata(bo->bo, &metadata); | |||
} | |||
void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) | |||
{ | |||
ws->base.buffer_create = radv_amdgpu_winsys_bo_create; | |||
ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy; | |||
ws->base.buffer_get_va = radv_amdgpu_winsys_bo_get_va; | |||
ws->base.buffer_map = radv_amdgpu_winsys_bo_map; | |||
ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap; | |||
ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; | |||
ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; | |||
ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; | |||
} |
@@ -0,0 +1,50 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "radv_amdgpu_winsys.h" | |||
struct radv_amdgpu_winsys_bo { | |||
amdgpu_bo_handle bo; | |||
amdgpu_va_handle va_handle; | |||
uint64_t va; | |||
enum radeon_bo_domain initial_domain; | |||
uint64_t size; | |||
bool is_shared; | |||
struct radv_amdgpu_winsys *ws; | |||
struct list_head global_list_item; | |||
}; | |||
static inline | |||
struct radv_amdgpu_winsys_bo *radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo) | |||
{ | |||
return (struct radv_amdgpu_winsys_bo *)bo; | |||
} | |||
void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws); | |||
@@ -0,0 +1,778 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <stdlib.h> | |||
#include <amdgpu.h> | |||
#include <amdgpu_drm.h> | |||
#include <assert.h> | |||
#include "amdgpu_id.h" | |||
#include "radv_radeon_winsys.h" | |||
#include "radv_amdgpu_cs.h" | |||
#include "radv_amdgpu_bo.h" | |||
#include "sid.h" | |||
struct radv_amdgpu_cs { | |||
struct radeon_winsys_cs base; | |||
struct radv_amdgpu_winsys *ws; | |||
struct amdgpu_cs_ib_info ib; | |||
struct radeon_winsys_bo *ib_buffer; | |||
uint8_t *ib_mapped; | |||
unsigned max_num_buffers; | |||
unsigned num_buffers; | |||
amdgpu_bo_handle *handles; | |||
uint8_t *priorities; | |||
struct radeon_winsys_bo **old_ib_buffers; | |||
unsigned num_old_ib_buffers; | |||
unsigned max_num_old_ib_buffers; | |||
unsigned *ib_size_ptr; | |||
bool failed; | |||
bool is_chained; | |||
int buffer_hash_table[1024]; | |||
}; | |||
static inline struct radv_amdgpu_cs * | |||
radv_amdgpu_cs(struct radeon_winsys_cs *base) | |||
{ | |||
return (struct radv_amdgpu_cs*)base; | |||
} | |||
static struct radeon_winsys_fence *radv_amdgpu_create_fence() | |||
{ | |||
struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence)); | |||
return (struct radeon_winsys_fence*)fence; | |||
} | |||
static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence) | |||
{ | |||
struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; | |||
free(fence); | |||
} | |||
static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws, | |||
struct radeon_winsys_fence *_fence, | |||
bool absolute, | |||
uint64_t timeout) | |||
{ | |||
struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; | |||
unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0; | |||
int r; | |||
uint32_t expired = 0; | |||
/* Now use the libdrm query. */ | |||
r = amdgpu_cs_query_fence_status(fence, | |||
timeout, | |||
flags, | |||
&expired); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n"); | |||
return false; | |||
} | |||
if (expired) { | |||
return true; | |||
} | |||
return false; | |||
} | |||
static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs); | |||
if (cs->ib_buffer) | |||
cs->ws->base.buffer_destroy(cs->ib_buffer); | |||
else | |||
free(cs->base.buf); | |||
for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) | |||
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); | |||
free(cs->old_ib_buffers); | |||
free(cs->handles); | |||
free(cs->priorities); | |||
free(cs); | |||
} | |||
static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs, | |||
enum ring_type ring_type) | |||
{ | |||
for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i) { | |||
cs->buffer_hash_table[i] = -1; | |||
} | |||
return true; | |||
} | |||
static struct radeon_winsys_cs * | |||
radv_amdgpu_cs_create(struct radeon_winsys *ws, | |||
enum ring_type ring_type) | |||
{ | |||
struct radv_amdgpu_cs *cs; | |||
uint32_t ib_size = 20 * 1024 * 4; | |||
cs = calloc(1, sizeof(struct radv_amdgpu_cs)); | |||
if (!cs) | |||
return NULL; | |||
cs->ws = radv_amdgpu_winsys(ws); | |||
radv_amdgpu_init_cs(cs, RING_GFX); | |||
if (cs->ws->use_ib_bos) { | |||
cs->ib_buffer = ws->buffer_create(ws, ib_size, 0, | |||
RADEON_DOMAIN_GTT, | |||
RADEON_FLAG_CPU_ACCESS); | |||
if (!cs->ib_buffer) { | |||
free(cs); | |||
return NULL; | |||
} | |||
cs->ib_mapped = ws->buffer_map(cs->ib_buffer); | |||
if (!cs->ib_mapped) { | |||
ws->buffer_destroy(cs->ib_buffer); | |||
free(cs); | |||
return NULL; | |||
} | |||
cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va; | |||
cs->base.buf = (uint32_t *)cs->ib_mapped; | |||
cs->base.max_dw = ib_size / 4 - 4; | |||
cs->ib_size_ptr = &cs->ib.size; | |||
cs->ib.size = 0; | |||
ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8); | |||
} else { | |||
cs->base.buf = malloc(16384); | |||
cs->base.max_dw = 4096; | |||
if (!cs->base.buf) { | |||
free(cs); | |||
return NULL; | |||
} | |||
} | |||
return &cs->base; | |||
} | |||
static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); | |||
uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2); | |||
/* max that fits in the chain size field. */ | |||
ib_size = MIN2(ib_size, 0xfffff); | |||
if (cs->failed) { | |||
cs->base.cdw = 0; | |||
return; | |||
} | |||
if (!cs->ws->use_ib_bos) { | |||
uint32_t *new_buf = realloc(cs->base.buf, ib_size); | |||
if (new_buf) { | |||
cs->base.buf = new_buf; | |||
cs->base.max_dw = ib_size / 4; | |||
} else { | |||
cs->failed = true; | |||
cs->base.cdw = 0; | |||
} | |||
return; | |||
} | |||
while (!cs->base.cdw || (cs->base.cdw & 7) != 4) | |||
cs->base.buf[cs->base.cdw++] = 0xffff1000; | |||
*cs->ib_size_ptr |= cs->base.cdw + 4; | |||
if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) { | |||
cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2); | |||
cs->old_ib_buffers = realloc(cs->old_ib_buffers, | |||
cs->max_num_old_ib_buffers * sizeof(void*)); | |||
} | |||
cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer; | |||
cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, | |||
RADEON_DOMAIN_GTT, | |||
RADEON_FLAG_CPU_ACCESS); | |||
if (!cs->ib_buffer) { | |||
cs->base.cdw = 0; | |||
cs->failed = true; | |||
cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers]; | |||
} | |||
cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer); | |||
if (!cs->ib_mapped) { | |||
cs->ws->base.buffer_destroy(cs->ib_buffer); | |||
cs->base.cdw = 0; | |||
cs->failed = true; | |||
cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers]; | |||
} | |||
cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); | |||
cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0); | |||
cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va; | |||
cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32; | |||
cs->ib_size_ptr = cs->base.buf + cs->base.cdw; | |||
cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1); | |||
cs->base.buf = (uint32_t *)cs->ib_mapped; | |||
cs->base.cdw = 0; | |||
cs->base.max_dw = ib_size / 4 - 4; | |||
} | |||
static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); | |||
if (cs->ws->use_ib_bos) { | |||
while (!cs->base.cdw || (cs->base.cdw & 7) != 0) | |||
cs->base.buf[cs->base.cdw++] = 0xffff1000; | |||
*cs->ib_size_ptr |= cs->base.cdw; | |||
cs->is_chained = false; | |||
} | |||
return !cs->failed; | |||
} | |||
static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); | |||
cs->base.cdw = 0; | |||
cs->failed = false; | |||
for (unsigned i = 0; i < cs->num_buffers; ++i) { | |||
unsigned hash = ((uintptr_t)cs->handles[i] >> 6) & | |||
(ARRAY_SIZE(cs->buffer_hash_table) - 1); | |||
cs->buffer_hash_table[hash] = -1; | |||
} | |||
cs->num_buffers = 0; | |||
if (cs->ws->use_ib_bos) { | |||
cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); | |||
for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) | |||
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); | |||
cs->num_old_ib_buffers = 0; | |||
cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va; | |||
cs->ib_size_ptr = &cs->ib.size; | |||
cs->ib.size = 0; | |||
} | |||
} | |||
static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs, | |||
amdgpu_bo_handle bo) | |||
{ | |||
unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1); | |||
int index = cs->buffer_hash_table[hash]; | |||
if (index == -1) | |||
return -1; | |||
if(cs->handles[index] == bo) | |||
return index; | |||
for (unsigned i = 0; i < cs->num_buffers; ++i) { | |||
if (cs->handles[i] == bo) { | |||
cs->buffer_hash_table[hash] = i; | |||
return i; | |||
} | |||
} | |||
return -1; | |||
} | |||
static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, | |||
amdgpu_bo_handle bo, | |||
uint8_t priority) | |||
{ | |||
unsigned hash; | |||
int index = radv_amdgpu_cs_find_buffer(cs, bo); | |||
if (index != -1) { | |||
cs->priorities[index] = MAX2(cs->priorities[index], priority); | |||
return; | |||
} | |||
if (cs->num_buffers == cs->max_num_buffers) { | |||
unsigned new_count = MAX2(1, cs->max_num_buffers * 2); | |||
cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle)); | |||
cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t)); | |||
cs->max_num_buffers = new_count; | |||
} | |||
cs->handles[cs->num_buffers] = bo; | |||
cs->priorities[cs->num_buffers] = priority; | |||
hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1); | |||
cs->buffer_hash_table[hash] = cs->num_buffers; | |||
++cs->num_buffers; | |||
} | |||
static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs, | |||
struct radeon_winsys_bo *_bo, | |||
uint8_t priority) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); | |||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); | |||
radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority); | |||
} | |||
static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent, | |||
struct radeon_winsys_cs *_child) | |||
{ | |||
struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent); | |||
struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child); | |||
for (unsigned i = 0; i < child->num_buffers; ++i) { | |||
radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i], | |||
child->priorities[i]); | |||
} | |||
if (parent->ws->use_ib_bos) { | |||
if (parent->base.cdw + 4 > parent->base.max_dw) | |||
radv_amdgpu_cs_grow(&parent->base, 4); | |||
parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0); | |||
parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address; | |||
parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32; | |||
parent->base.buf[parent->base.cdw++] = child->ib.size; | |||
} else { | |||
if (parent->base.cdw + child->base.cdw > parent->base.max_dw) | |||
radv_amdgpu_cs_grow(&parent->base, child->base.cdw); | |||
memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw); | |||
parent->base.cdw += child->base.cdw; | |||
} | |||
} | |||
static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, | |||
struct radeon_winsys_cs **cs_array, | |||
unsigned count, | |||
struct radv_amdgpu_winsys_bo *extra_bo, | |||
amdgpu_bo_list_handle *bo_list) | |||
{ | |||
int r; | |||
if (ws->debug_all_bos) { | |||
struct radv_amdgpu_winsys_bo *bo; | |||
amdgpu_bo_handle *handles; | |||
unsigned num = 0; | |||
pthread_mutex_lock(&ws->global_bo_list_lock); | |||
handles = malloc(sizeof(handles[0]) * ws->num_buffers); | |||
if (!handles) { | |||
pthread_mutex_unlock(&ws->global_bo_list_lock); | |||
return -ENOMEM; | |||
} | |||
LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) { | |||
assert(num < ws->num_buffers); | |||
handles[num++] = bo->bo; | |||
} | |||
r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, | |||
handles, NULL, | |||
bo_list); | |||
free(handles); | |||
pthread_mutex_unlock(&ws->global_bo_list_lock); | |||
} else if (count == 1 && !extra_bo) { | |||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; | |||
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, | |||
cs->priorities, bo_list); | |||
} else { | |||
unsigned total_buffer_count = !!extra_bo; | |||
unsigned unique_bo_count = !!extra_bo; | |||
for (unsigned i = 0; i < count; ++i) { | |||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; | |||
total_buffer_count += cs->num_buffers; | |||
} | |||
amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count); | |||
uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count); | |||
if (!handles || !priorities) { | |||
free(handles); | |||
free(priorities); | |||
return -ENOMEM; | |||
} | |||
if (extra_bo) { | |||
handles[0] = extra_bo->bo; | |||
priorities[0] = 8; | |||
} | |||
for (unsigned i = 0; i < count; ++i) { | |||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; | |||
for (unsigned j = 0; j < cs->num_buffers; ++j) { | |||
bool found = false; | |||
for (unsigned k = 0; k < unique_bo_count; ++k) { | |||
if (handles[k] == cs->handles[j]) { | |||
found = true; | |||
priorities[k] = MAX2(priorities[k], | |||
cs->priorities[j]); | |||
break; | |||
} | |||
} | |||
if (!found) { | |||
handles[unique_bo_count] = cs->handles[j]; | |||
priorities[unique_bo_count] = cs->priorities[j]; | |||
++unique_bo_count; | |||
} | |||
} | |||
} | |||
r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, | |||
priorities, bo_list); | |||
free(handles); | |||
free(priorities); | |||
} | |||
return r; | |||
} | |||
static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, | |||
struct radeon_winsys_cs **cs_array, | |||
unsigned cs_count, | |||
struct radeon_winsys_fence *_fence) | |||
{ | |||
int r; | |||
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); | |||
struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; | |||
struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); | |||
amdgpu_bo_list_handle bo_list; | |||
struct amdgpu_cs_request request = {0}; | |||
for (unsigned i = cs_count; i--;) { | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); | |||
if (cs->is_chained) { | |||
*cs->ib_size_ptr -= 4; | |||
cs->is_chained = false; | |||
} | |||
if (i + 1 < cs_count) { | |||
struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]); | |||
assert(cs->base.cdw + 4 <= cs->base.max_dw); | |||
cs->is_chained = true; | |||
*cs->ib_size_ptr += 4; | |||
cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0); | |||
cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address; | |||
cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32; | |||
cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size; | |||
} | |||
} | |||
r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); | |||
return r; | |||
} | |||
request.ip_type = AMDGPU_HW_IP_GFX; | |||
request.number_of_ibs = 1; | |||
request.ibs = &cs0->ib; | |||
request.resources = bo_list; | |||
r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); | |||
if (r) { | |||
if (r == -ENOMEM) | |||
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); | |||
else | |||
fprintf(stderr, "amdgpu: The CS has been rejected, " | |||
"see dmesg for more information.\n"); | |||
} | |||
amdgpu_bo_list_destroy(bo_list); | |||
if (fence) { | |||
fence->context = ctx->ctx; | |||
fence->ip_type = request.ip_type; | |||
fence->ip_instance = request.ip_instance; | |||
fence->ring = request.ring; | |||
fence->fence = request.seq_no; | |||
} | |||
ctx->last_seq_no = request.seq_no; | |||
return r; | |||
} | |||
static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, | |||
struct radeon_winsys_cs **cs_array, | |||
unsigned cs_count, | |||
struct radeon_winsys_fence *_fence) | |||
{ | |||
int r; | |||
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); | |||
struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; | |||
amdgpu_bo_list_handle bo_list; | |||
struct amdgpu_cs_request request; | |||
assert(cs_count); | |||
for (unsigned i = 0; i < cs_count;) { | |||
struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); | |||
struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; | |||
unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i); | |||
memset(&request, 0, sizeof(request)); | |||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); | |||
return r; | |||
} | |||
request.ip_type = AMDGPU_HW_IP_GFX; | |||
request.resources = bo_list; | |||
request.number_of_ibs = cnt; | |||
request.ibs = ibs; | |||
for (unsigned j = 0; j < cnt; ++j) { | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); | |||
ibs[j] = cs->ib; | |||
if (cs->is_chained) { | |||
*cs->ib_size_ptr -= 4; | |||
cs->is_chained = false; | |||
} | |||
} | |||
r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); | |||
if (r) { | |||
if (r == -ENOMEM) | |||
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); | |||
else | |||
fprintf(stderr, "amdgpu: The CS has been rejected, " | |||
"see dmesg for more information.\n"); | |||
} | |||
amdgpu_bo_list_destroy(bo_list); | |||
if (r) | |||
return r; | |||
i += cnt; | |||
} | |||
if (fence) { | |||
fence->context = ctx->ctx; | |||
fence->ip_type = request.ip_type; | |||
fence->ip_instance = request.ip_instance; | |||
fence->ring = request.ring; | |||
fence->fence = request.seq_no; | |||
} | |||
ctx->last_seq_no = request.seq_no; | |||
return 0; | |||
} | |||
static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, | |||
struct radeon_winsys_cs **cs_array, | |||
unsigned cs_count, | |||
struct radeon_winsys_fence *_fence) | |||
{ | |||
int r; | |||
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); | |||
struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; | |||
struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); | |||
struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws; | |||
amdgpu_bo_list_handle bo_list; | |||
struct amdgpu_cs_request request; | |||
uint32_t pad_word = 0xffff1000U; | |||
if (radv_amdgpu_winsys(ws)->family == FAMILY_SI) | |||
pad_word = 0x80000000; | |||
assert(cs_count); | |||
for (unsigned i = 0; i < cs_count;) { | |||
struct amdgpu_cs_ib_info ib = {0}; | |||
struct radeon_winsys_bo *bo = NULL; | |||
uint32_t *ptr; | |||
unsigned cnt = 0; | |||
unsigned size = 0; | |||
while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { | |||
size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; | |||
++cnt; | |||
} | |||
assert(cnt); | |||
bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); | |||
ptr = ws->buffer_map(bo); | |||
for (unsigned j = 0; j < cnt; ++j) { | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); | |||
memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); | |||
ptr += cs->base.cdw; | |||
} | |||
while(!size || (size & 7)) { | |||
*ptr++ = pad_word; | |||
++size; | |||
} | |||
memset(&request, 0, sizeof(request)); | |||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, | |||
(struct radv_amdgpu_winsys_bo*)bo, &bo_list); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); | |||
return r; | |||
} | |||
ib.size = size; | |||
ib.ib_mc_address = ws->buffer_get_va(bo); | |||
request.ip_type = AMDGPU_HW_IP_GFX; | |||
request.resources = bo_list; | |||
request.number_of_ibs = 1; | |||
request.ibs = &ib; | |||
r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); | |||
if (r) { | |||
if (r == -ENOMEM) | |||
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); | |||
else | |||
fprintf(stderr, "amdgpu: The CS has been rejected, " | |||
"see dmesg for more information.\n"); | |||
} | |||
amdgpu_bo_list_destroy(bo_list); | |||
ws->buffer_destroy(bo); | |||
if (r) | |||
return r; | |||
i += cnt; | |||
} | |||
if (fence) { | |||
fence->context = ctx->ctx; | |||
fence->ip_type = request.ip_type; | |||
fence->ip_instance = request.ip_instance; | |||
fence->ring = request.ring; | |||
fence->fence = request.seq_no; | |||
} | |||
ctx->last_seq_no = request.seq_no; | |||
return 0; | |||
} | |||
static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, | |||
struct radeon_winsys_cs **cs_array, | |||
unsigned cs_count, | |||
bool can_patch, | |||
struct radeon_winsys_fence *_fence) | |||
{ | |||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]); | |||
if (!cs->ws->use_ib_bos) { | |||
return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array, | |||
cs_count, _fence); | |||
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) { | |||
return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array, | |||
cs_count, _fence); | |||
} else { | |||
return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array, | |||
cs_count, _fence); | |||
} | |||
} | |||
static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws) | |||
{ | |||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); | |||
struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx); | |||
int r; | |||
if (!ctx) | |||
return NULL; | |||
r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r); | |||
goto error_create; | |||
} | |||
ctx->ws = ws; | |||
return (struct radeon_winsys_ctx *)ctx; | |||
error_create: | |||
return NULL; | |||
} | |||
static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx) | |||
{ | |||
struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx; | |||
amdgpu_cs_ctx_free(ctx->ctx); | |||
FREE(ctx); | |||
} | |||
static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx) | |||
{ | |||
struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx; | |||
if (ctx->last_seq_no) { | |||
uint32_t expired; | |||
struct amdgpu_cs_fence fence; | |||
fence.context = ctx->ctx; | |||
fence.ip_type = RING_GFX; | |||
fence.ip_instance = 0; | |||
fence.ring = 0; | |||
fence.fence = ctx->last_seq_no; | |||
int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0, | |||
&expired); | |||
if (ret || !expired) | |||
return false; | |||
} | |||
return true; | |||
} | |||
void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) | |||
{ | |||
ws->base.ctx_create = radv_amdgpu_ctx_create; | |||
ws->base.ctx_destroy = radv_amdgpu_ctx_destroy; | |||
ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle; | |||
ws->base.cs_create = radv_amdgpu_cs_create; | |||
ws->base.cs_destroy = radv_amdgpu_cs_destroy; | |||
ws->base.cs_grow = radv_amdgpu_cs_grow; | |||
ws->base.cs_finalize = radv_amdgpu_cs_finalize; | |||
ws->base.cs_reset = radv_amdgpu_cs_reset; | |||
ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer; | |||
ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary; | |||
ws->base.cs_submit = radv_amdgpu_winsys_cs_submit; | |||
ws->base.create_fence = radv_amdgpu_create_fence; | |||
ws->base.destroy_fence = radv_amdgpu_destroy_fence; | |||
ws->base.fence_wait = radv_amdgpu_fence_wait; | |||
} |
@@ -0,0 +1,51 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <string.h> | |||
#include <stdint.h> | |||
#include <assert.h> | |||
#include "r600d_common.h" | |||
#include <amdgpu.h> | |||
#include "radv_radeon_winsys.h" | |||
#include "radv_amdgpu_winsys.h" | |||
struct radv_amdgpu_ctx { | |||
struct radv_amdgpu_winsys *ws; | |||
amdgpu_context_handle ctx; | |||
uint64_t last_seq_no; | |||
}; | |||
static inline struct radv_amdgpu_ctx * | |||
radv_amdgpu_ctx(struct radeon_winsys_ctx *base) | |||
{ | |||
return (struct radv_amdgpu_ctx *)base; | |||
} | |||
void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws); |
@@ -0,0 +1,523 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include <errno.h> | |||
#include "radv_private.h" | |||
#include "addrlib/addrinterface.h" | |||
#include "util/bitset.h" | |||
#include "radv_amdgpu_winsys.h" | |||
#include "radv_amdgpu_surface.h" | |||
#include "sid.h" | |||
#ifndef NO_ENTRIES | |||
#define NO_ENTRIES 32 | |||
#endif | |||
#ifndef NO_MACRO_ENTRIES | |||
#define NO_MACRO_ENTRIES 16 | |||
#endif | |||
#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND | |||
#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A | |||
#endif | |||
static int radv_amdgpu_surface_sanity(const struct radeon_surf *surf) | |||
{ | |||
unsigned type = RADEON_SURF_GET(surf->flags, TYPE); | |||
if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX)) | |||
return -EINVAL; | |||
/* all dimension must be at least 1 ! */ | |||
if (!surf->npix_x || !surf->npix_y || !surf->npix_z || | |||
!surf->array_size) | |||
return -EINVAL; | |||
if (!surf->blk_w || !surf->blk_h || !surf->blk_d) | |||
return -EINVAL; | |||
switch (surf->nsamples) { | |||
case 1: | |||
case 2: | |||
case 4: | |||
case 8: | |||
break; | |||
default: | |||
return -EINVAL; | |||
} | |||
switch (type) { | |||
case RADEON_SURF_TYPE_1D: | |||
if (surf->npix_y > 1) | |||
return -EINVAL; | |||
/* fall through */ | |||
case RADEON_SURF_TYPE_2D: | |||
case RADEON_SURF_TYPE_CUBEMAP: | |||
if (surf->npix_z > 1 || surf->array_size > 1) | |||
return -EINVAL; | |||
break; | |||
case RADEON_SURF_TYPE_3D: | |||
if (surf->array_size > 1) | |||
return -EINVAL; | |||
break; | |||
case RADEON_SURF_TYPE_1D_ARRAY: | |||
if (surf->npix_y > 1) | |||
return -EINVAL; | |||
/* fall through */ | |||
case RADEON_SURF_TYPE_2D_ARRAY: | |||
if (surf->npix_z > 1) | |||
return -EINVAL; | |||
break; | |||
default: | |||
return -EINVAL; | |||
} | |||
return 0; | |||
} | |||
static void *ADDR_API radv_allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput) | |||
{ | |||
return malloc(pInput->sizeInBytes); | |||
} | |||
static ADDR_E_RETURNCODE ADDR_API radv_freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput) | |||
{ | |||
free(pInput->pVirtAddr); | |||
return ADDR_OK; | |||
} | |||
ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id, | |||
enum chip_class chip_class) | |||
{ | |||
ADDR_CREATE_INPUT addrCreateInput = {0}; | |||
ADDR_CREATE_OUTPUT addrCreateOutput = {0}; | |||
ADDR_REGISTER_VALUE regValue = {0}; | |||
ADDR_CREATE_FLAGS createFlags = {{0}}; | |||
ADDR_E_RETURNCODE addrRet; | |||
addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); | |||
addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); | |||
regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3; | |||
regValue.gbAddrConfig = amdinfo->gb_addr_cfg; | |||
regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2; | |||
regValue.backendDisables = amdinfo->backend_disable[0]; | |||
regValue.pTileConfig = amdinfo->gb_tile_mode; | |||
regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode); | |||
if (chip_class == SI) { | |||
regValue.pMacroTileConfig = NULL; | |||
regValue.noOfMacroEntries = 0; | |||
} else { | |||
regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode; | |||
regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode); | |||
} | |||
createFlags.value = 0; | |||
createFlags.useTileIndex = 1; | |||
createFlags.degradeBaseLevel = 1; | |||
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; | |||
addrCreateInput.chipFamily = family; | |||
addrCreateInput.chipRevision = rev_id; | |||
addrCreateInput.createFlags = createFlags; | |||
addrCreateInput.callbacks.allocSysMem = radv_allocSysMem; | |||
addrCreateInput.callbacks.freeSysMem = radv_freeSysMem; | |||
addrCreateInput.callbacks.debugPrint = 0; | |||
addrCreateInput.regValue = regValue; | |||
addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); | |||
if (addrRet != ADDR_OK) | |||
return NULL; | |||
return addrCreateOutput.hLib; | |||
} | |||
static int radv_compute_level(ADDR_HANDLE addrlib, | |||
struct radeon_surf *surf, bool is_stencil, | |||
unsigned level, unsigned type, bool compressed, | |||
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, | |||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, | |||
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, | |||
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut) | |||
{ | |||
struct radeon_surf_level *surf_level; | |||
ADDR_E_RETURNCODE ret; | |||
AddrSurfInfoIn->mipLevel = level; | |||
AddrSurfInfoIn->width = u_minify(surf->npix_x, level); | |||
AddrSurfInfoIn->height = u_minify(surf->npix_y, level); | |||
if (type == RADEON_SURF_TYPE_3D) | |||
AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level); | |||
else if (type == RADEON_SURF_TYPE_CUBEMAP) | |||
AddrSurfInfoIn->numSlices = 6; | |||
else | |||
AddrSurfInfoIn->numSlices = surf->array_size; | |||
if (level > 0) { | |||
/* Set the base level pitch. This is needed for calculation | |||
* of non-zero levels. */ | |||
if (is_stencil) | |||
AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x; | |||
else | |||
AddrSurfInfoIn->basePitch = surf->level[0].nblk_x; | |||
/* Convert blocks to pixels for compressed formats. */ | |||
if (compressed) | |||
AddrSurfInfoIn->basePitch *= surf->blk_w; | |||
} | |||
ret = AddrComputeSurfaceInfo(addrlib, | |||
AddrSurfInfoIn, | |||
AddrSurfInfoOut); | |||
if (ret != ADDR_OK) { | |||
return ret; | |||
} | |||
surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level]; | |||
surf_level->offset = align64(surf->bo_size, AddrSurfInfoOut->baseAlign); | |||
surf_level->slice_size = AddrSurfInfoOut->sliceSize; | |||
surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe); | |||
surf_level->npix_x = u_minify(surf->npix_x, level); | |||
surf_level->npix_y = u_minify(surf->npix_y, level); | |||
surf_level->npix_z = u_minify(surf->npix_z, level); | |||
surf_level->nblk_x = AddrSurfInfoOut->pitch; | |||
surf_level->nblk_y = AddrSurfInfoOut->height; | |||
if (type == RADEON_SURF_TYPE_3D) | |||
surf_level->nblk_z = AddrSurfInfoOut->depth; | |||
else | |||
surf_level->nblk_z = 1; | |||
switch (AddrSurfInfoOut->tileMode) { | |||
case ADDR_TM_LINEAR_ALIGNED: | |||
surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; | |||
break; | |||
case ADDR_TM_1D_TILED_THIN1: | |||
surf_level->mode = RADEON_SURF_MODE_1D; | |||
break; | |||
case ADDR_TM_2D_TILED_THIN1: | |||
surf_level->mode = RADEON_SURF_MODE_2D; | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
if (is_stencil) | |||
surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; | |||
else | |||
surf->tiling_index[level] = AddrSurfInfoOut->tileIndex; | |||
surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize; | |||
/* Clear DCC fields at the beginning. */ | |||
surf_level->dcc_offset = 0; | |||
surf_level->dcc_enabled = false; | |||
/* The previous level's flag tells us if we can use DCC for this level. */ | |||
if (AddrSurfInfoIn->flags.dccCompatible && | |||
(level == 0 || AddrDccOut->subLvlCompressible)) { | |||
AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; | |||
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; | |||
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; | |||
AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; | |||
AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; | |||
ret = AddrComputeDccInfo(addrlib, | |||
AddrDccIn, | |||
AddrDccOut); | |||
if (ret == ADDR_OK) { | |||
surf_level->dcc_offset = surf->dcc_size; | |||
surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; | |||
surf_level->dcc_enabled = true; | |||
surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize; | |||
surf->dcc_alignment = MAX(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign); | |||
} | |||
} | |||
return 0; | |||
} | |||
static void radv_set_micro_tile_mode(struct radeon_surf *surf, | |||
struct radeon_info *info) | |||
{ | |||
uint32_t tile_mode = info->si_tile_mode_array[surf->tiling_index[0]]; | |||
if (info->chip_class >= CIK) | |||
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); | |||
else | |||
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); | |||
} | |||
static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, | |||
struct radeon_surf *surf) | |||
{ | |||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); | |||
unsigned level, mode, type; | |||
bool compressed; | |||
ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; | |||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; | |||
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; | |||
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; | |||
ADDR_TILEINFO AddrTileInfoIn = {0}; | |||
ADDR_TILEINFO AddrTileInfoOut = {0}; | |||
int r; | |||
r = radv_amdgpu_surface_sanity(surf); | |||
if (r) | |||
return r; | |||
AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); | |||
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); | |||
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); | |||
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); | |||
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; | |||
type = RADEON_SURF_GET(surf->flags, TYPE); | |||
mode = RADEON_SURF_GET(surf->flags, MODE); | |||
compressed = surf->blk_w == 4 && surf->blk_h == 4; | |||
/* MSAA and FMASK require 2D tiling. */ | |||
if (surf->nsamples > 1 || | |||
(surf->flags & RADEON_SURF_FMASK)) | |||
mode = RADEON_SURF_MODE_2D; | |||
/* DB doesn't support linear layouts. */ | |||
if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && | |||
mode < RADEON_SURF_MODE_1D) | |||
mode = RADEON_SURF_MODE_1D; | |||
/* Set the requested tiling mode. */ | |||
switch (mode) { | |||
case RADEON_SURF_MODE_LINEAR_ALIGNED: | |||
AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; | |||
break; | |||
case RADEON_SURF_MODE_1D: | |||
AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; | |||
break; | |||
case RADEON_SURF_MODE_2D: | |||
AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
/* The format must be set correctly for the allocation of compressed | |||
* textures to work. In other cases, setting the bpp is sufficient. */ | |||
if (compressed) { | |||
switch (surf->bpe) { | |||
case 8: | |||
AddrSurfInfoIn.format = ADDR_FMT_BC1; | |||
break; | |||
case 16: | |||
AddrSurfInfoIn.format = ADDR_FMT_BC3; | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
} | |||
else { | |||
AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; | |||
} | |||
AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples; | |||
AddrSurfInfoIn.tileIndex = -1; | |||
/* Set the micro tile type. */ | |||
if (surf->flags & RADEON_SURF_SCANOUT) | |||
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; | |||
else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) | |||
AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; | |||
else | |||
AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; | |||
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); | |||
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; | |||
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP; | |||
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; | |||
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; | |||
AddrSurfInfoIn.flags.degrade4Space = 1; | |||
/* DCC notes: | |||
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp | |||
* with samples >= 4. | |||
* - Mipmapped array textures have low performance (discovered by a closed | |||
* driver team). | |||
*/ | |||
AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && | |||
!(surf->flags & RADEON_SURF_DISABLE_DCC) && | |||
!compressed && AddrDccIn.numSamples <= 1 && | |||
((surf->array_size == 1 && surf->npix_z == 1) || | |||
surf->last_level == 0); | |||
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; | |||
AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth; | |||
/* noStencil = 0 can result in a depth part that is incompatible with | |||
* mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in | |||
* this case, we may end up setting stencil_adjusted). | |||
* | |||
* TODO: update addrlib to a newer version, remove this, and | |||
* use flags.matchStencilTileCfg = 1 as an alternative fix. | |||
*/ | |||
if (surf->last_level > 0) | |||
AddrSurfInfoIn.flags.noStencil = 1; | |||
/* Set preferred macrotile parameters. This is usually required | |||
* for shared resources. This is for 2D tiling only. */ | |||
if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && | |||
surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) { | |||
/* If any of these parameters are incorrect, the calculation | |||
* will fail. */ | |||
AddrTileInfoIn.banks = surf->num_banks; | |||
AddrTileInfoIn.bankWidth = surf->bankw; | |||
AddrTileInfoIn.bankHeight = surf->bankh; | |||
AddrTileInfoIn.macroAspectRatio = surf->mtilea; | |||
AddrTileInfoIn.tileSplitBytes = surf->tile_split; | |||
AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */ | |||
AddrSurfInfoIn.flags.degrade4Space = 0; | |||
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; | |||
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set | |||
* the tile index, because we are expected to know it if | |||
* we know the other parameters. | |||
* | |||
* This is something that can easily be fixed in Addrlib. | |||
* For now, just figure it out here. | |||
* Note that only 2D_TILE_THIN1 is handled here. | |||
*/ | |||
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); | |||
assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); | |||
if (ws->info.chip_class == SI) { | |||
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { | |||
if (surf->bpe == 2) | |||
AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ | |||
else | |||
AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ | |||
} else { | |||
if (surf->bpe == 1) | |||
AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ | |||
else if (surf->bpe == 2) | |||
AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ | |||
else if (surf->bpe == 4) | |||
AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ | |||
else | |||
AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ | |||
} | |||
} else { | |||
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) | |||
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ | |||
else | |||
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ | |||
} | |||
} | |||
surf->bo_size = 0; | |||
surf->dcc_size = 0; | |||
surf->dcc_alignment = 1; | |||
/* Calculate texture layout information. */ | |||
for (level = 0; level <= surf->last_level; level++) { | |||
r = radv_compute_level(ws->addrlib, surf, false, level, type, compressed, | |||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); | |||
if (r) | |||
return r; | |||
if (level == 0) { | |||
surf->bo_alignment = AddrSurfInfoOut.baseAlign; | |||
surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1; | |||
radv_set_micro_tile_mode(surf, &ws->info); | |||
/* For 2D modes only. */ | |||
if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { | |||
surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth; | |||
surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight; | |||
surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio; | |||
surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes; | |||
surf->num_banks = AddrSurfInfoOut.pTileInfo->banks; | |||
surf->macro_tile_index = AddrSurfInfoOut.macroModeIndex; | |||
} else { | |||
surf->macro_tile_index = 0; | |||
} | |||
} | |||
} | |||
/* Calculate texture layout information for stencil. */ | |||
if (surf->flags & RADEON_SURF_SBUFFER) { | |||
AddrSurfInfoIn.bpp = 8; | |||
AddrSurfInfoIn.flags.depth = 0; | |||
AddrSurfInfoIn.flags.stencil = 1; | |||
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ | |||
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split; | |||
for (level = 0; level <= surf->last_level; level++) { | |||
r = radv_compute_level(ws->addrlib, surf, true, level, type, compressed, | |||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); | |||
if (r) | |||
return r; | |||
/* DB uses the depth pitch for both stencil and depth. */ | |||
if (surf->stencil_level[level].nblk_x != surf->level[level].nblk_x) | |||
surf->stencil_adjusted = true; | |||
if (level == 0) { | |||
/* For 2D modes only. */ | |||
if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { | |||
surf->stencil_tile_split = | |||
AddrSurfInfoOut.pTileInfo->tileSplitBytes; | |||
} | |||
} | |||
} | |||
} | |||
/* Recalculate the whole DCC miptree size including disabled levels. | |||
* This is what addrlib does, but calling addrlib would be a lot more | |||
* complicated. | |||
*/ | |||
#if 0 | |||
if (surf->dcc_size && surf->last_level > 0) { | |||
surf->dcc_size = align64(surf->bo_size >> 8, | |||
ws->info.pipe_interleave_bytes * | |||
ws->info.num_tile_pipes); | |||
} | |||
#endif | |||
return 0; | |||
} | |||
static int radv_amdgpu_winsys_surface_best(struct radeon_winsys *rws, | |||
struct radeon_surf *surf) | |||
{ | |||
return 0; | |||
} | |||
void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws) | |||
{ | |||
ws->base.surface_init = radv_amdgpu_winsys_surface_init; | |||
ws->base.surface_best = radv_amdgpu_winsys_surface_best; | |||
} |
@@ -0,0 +1,29 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include <amdgpu.h> | |||
void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws); | |||
ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id, enum chip_class chip_class); |
@@ -0,0 +1,359 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "radv_amdgpu_winsys.h" | |||
#include "radv_amdgpu_winsys_public.h" | |||
#include "radv_amdgpu_surface.h" | |||
#include "amdgpu_id.h" | |||
#include "xf86drm.h" | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <amdgpu_drm.h> | |||
#include <assert.h> | |||
#include "radv_amdgpu_cs.h" | |||
#include "radv_amdgpu_bo.h" | |||
#include "radv_amdgpu_surface.h" | |||
#define CIK_TILE_MODE_COLOR_2D 14 | |||
#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f) | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16 | |||
#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17 | |||
static unsigned radv_cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) | |||
{ | |||
unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D]; | |||
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) { | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P2: | |||
return 2; | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32: | |||
return 4; | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32: | |||
return 8; | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16: | |||
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: | |||
return 16; | |||
default: | |||
fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); | |||
assert(!"this should never occur"); | |||
return 2; | |||
} | |||
} | |||
static const char * | |||
get_chip_name(enum radeon_family family) | |||
{ | |||
switch (family) { | |||
case CHIP_TAHITI: return "AMD RADV TAHITI"; | |||
case CHIP_PITCAIRN: return "AMD RADV PITCAIRN"; | |||
case CHIP_VERDE: return "AMD RADV CAPE VERDE"; | |||
case CHIP_OLAND: return "AMD RADV OLAND"; | |||
case CHIP_HAINAN: return "AMD RADV HAINAN"; | |||
case CHIP_BONAIRE: return "AMD RADV BONAIRE"; | |||
case CHIP_KAVERI: return "AMD RADV KAVERI"; | |||
case CHIP_KABINI: return "AMD RADV KABINI"; | |||
case CHIP_HAWAII: return "AMD RADV HAWAII"; | |||
case CHIP_MULLINS: return "AMD RADV MULLINS"; | |||
case CHIP_TONGA: return "AMD RADV TONGA"; | |||
case CHIP_ICELAND: return "AMD RADV ICELAND"; | |||
case CHIP_CARRIZO: return "AMD RADV CARRIZO"; | |||
case CHIP_FIJI: return "AMD RADV FIJI"; | |||
case CHIP_POLARIS10: return "AMD RADV POLARIS10"; | |||
case CHIP_POLARIS11: return "AMD RADV POLARIS11"; | |||
case CHIP_STONEY: return "AMD RADV STONEY"; | |||
default: return "AMD RADV unknown"; | |||
} | |||
} | |||
static bool | |||
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) | |||
{ | |||
struct amdgpu_buffer_size_alignments alignment_info = {}; | |||
struct amdgpu_heap_info vram, gtt; | |||
struct drm_amdgpu_info_hw_ip dma = {}; | |||
drmDevicePtr devinfo; | |||
int r; | |||
int i, j; | |||
/* Get PCI info. */ | |||
r = drmGetDevice(fd, &devinfo); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: drmGetDevice failed.\n"); | |||
goto fail; | |||
} | |||
ws->info.pci_domain = devinfo->businfo.pci->domain; | |||
ws->info.pci_bus = devinfo->businfo.pci->bus; | |||
ws->info.pci_dev = devinfo->businfo.pci->dev; | |||
ws->info.pci_func = devinfo->businfo.pci->func; | |||
drmFreeDevice(&devinfo); | |||
/* Query hardware and driver information. */ | |||
r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n"); | |||
goto fail; | |||
} | |||
r = amdgpu_query_buffer_size_alignment(ws->dev, &alignment_info); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n"); | |||
goto fail; | |||
} | |||
r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n"); | |||
goto fail; | |||
} | |||
r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n"); | |||
goto fail; | |||
} | |||
r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_DMA, 0, &dma); | |||
if (r) { | |||
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n"); | |||
goto fail; | |||
} | |||
ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */ | |||
ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config; | |||
switch (ws->info.pci_id) { | |||
#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; break; | |||
#include "pci_ids/radeonsi_pci_ids.h" | |||
#undef CHIPSET | |||
default: | |||
fprintf(stderr, "amdgpu: Invalid PCI ID.\n"); | |||
goto fail; | |||
} | |||
if (ws->info.family >= CHIP_TONGA) | |||
ws->info.chip_class = VI; | |||
else if (ws->info.family >= CHIP_BONAIRE) | |||
ws->info.chip_class = CIK; | |||
else if (ws->info.family >= CHIP_TAHITI) | |||
ws->info.chip_class = SI; | |||
else { | |||
fprintf(stderr, "amdgpu: Unknown family.\n"); | |||
goto fail; | |||
} | |||
/* family and rev_id are for addrlib */ | |||
switch (ws->info.family) { | |||
case CHIP_TAHITI: | |||
ws->family = FAMILY_SI; | |||
ws->rev_id = SI_TAHITI_P_A0; | |||
break; | |||
case CHIP_PITCAIRN: | |||
ws->family = FAMILY_SI; | |||
ws->rev_id = SI_PITCAIRN_PM_A0; | |||
break; | |||
case CHIP_VERDE: | |||
ws->family = FAMILY_SI; | |||
ws->rev_id = SI_CAPEVERDE_M_A0; | |||
break; | |||
case CHIP_OLAND: | |||
ws->family = FAMILY_SI; | |||
ws->rev_id = SI_OLAND_M_A0; | |||
break; | |||
case CHIP_HAINAN: | |||
ws->family = FAMILY_SI; | |||
ws->rev_id = SI_HAINAN_V_A0; | |||
break; | |||
case CHIP_BONAIRE: | |||
ws->family = FAMILY_CI; | |||
ws->rev_id = CI_BONAIRE_M_A0; | |||
break; | |||
case CHIP_KAVERI: | |||
ws->family = FAMILY_KV; | |||
ws->rev_id = KV_SPECTRE_A0; | |||
break; | |||
case CHIP_KABINI: | |||
ws->family = FAMILY_KV; | |||
ws->rev_id = KB_KALINDI_A0; | |||
break; | |||
case CHIP_HAWAII: | |||
ws->family = FAMILY_CI; | |||
ws->rev_id = CI_HAWAII_P_A0; | |||
break; | |||
case CHIP_MULLINS: | |||
ws->family = FAMILY_KV; | |||
ws->rev_id = ML_GODAVARI_A0; | |||
break; | |||
case CHIP_TONGA: | |||
ws->family = FAMILY_VI; | |||
ws->rev_id = VI_TONGA_P_A0; | |||
break; | |||
case CHIP_ICELAND: | |||
ws->family = FAMILY_VI; | |||
ws->rev_id = VI_ICELAND_M_A0; | |||
break; | |||
case CHIP_CARRIZO: | |||
ws->family = FAMILY_CZ; | |||
ws->rev_id = CARRIZO_A0; | |||
break; | |||
case CHIP_STONEY: | |||
ws->family = FAMILY_CZ; | |||
ws->rev_id = STONEY_A0; | |||
break; | |||
case CHIP_FIJI: | |||
ws->family = FAMILY_VI; | |||
ws->rev_id = VI_FIJI_P_A0; | |||
break; | |||
case CHIP_POLARIS10: | |||
ws->family = FAMILY_VI; | |||
ws->rev_id = VI_POLARIS10_P_A0; | |||
break; | |||
case CHIP_POLARIS11: | |||
ws->family = FAMILY_VI; | |||
ws->rev_id = VI_POLARIS11_M_A0; | |||
break; | |||
default: | |||
fprintf(stderr, "amdgpu: Unknown family.\n"); | |||
goto fail; | |||
} | |||
ws->addrlib = radv_amdgpu_addr_create(&ws->amdinfo, ws->family, ws->rev_id, ws->info.chip_class); | |||
if (!ws->addrlib) { | |||
fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); | |||
goto fail; | |||
} | |||
/* Set hardware information. */ | |||
ws->info.name = get_chip_name(ws->info.family); | |||
ws->info.gart_size = gtt.heap_size; | |||
ws->info.vram_size = vram.heap_size; | |||
/* convert the shader clock from KHz to MHz */ | |||
ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000; | |||
ws->info.max_se = ws->amdinfo.num_shader_engines; | |||
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine; | |||
ws->info.has_uvd = 0; | |||
ws->info.vce_fw_version = 0; | |||
ws->info.has_userptr = TRUE; | |||
ws->info.num_render_backends = ws->amdinfo.rb_pipes; | |||
ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq; | |||
ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo); | |||
ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7); | |||
ws->info.has_virtual_memory = TRUE; | |||
ws->info.has_sdma = dma.available_rings != 0; | |||
/* Get the number of good compute units. */ | |||
ws->info.num_good_compute_units = 0; | |||
for (i = 0; i < ws->info.max_se; i++) | |||
for (j = 0; j < ws->info.max_sh_per_se; j++) | |||
ws->info.num_good_compute_units += | |||
util_bitcount(ws->amdinfo.cu_bitmap[i][j]); | |||
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode, | |||
sizeof(ws->amdinfo.gb_tile_mode)); | |||
ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask; | |||
memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode, | |||
sizeof(ws->amdinfo.gb_macro_tile_mode)); | |||
ws->info.gart_page_size = alignment_info.size_remote; | |||
if (ws->info.chip_class == SI) | |||
ws->info.gfx_ib_pad_with_type2 = TRUE; | |||
ws->use_ib_bos = ws->family >= FAMILY_CI; | |||
return true; | |||
fail: | |||
return false; | |||
} | |||
static void radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, | |||
struct radeon_info *info) | |||
{ | |||
*info = ((struct radv_amdgpu_winsys *)rws)->info; | |||
} | |||
static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) | |||
{ | |||
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws; | |||
AddrDestroy(ws->addrlib); | |||
amdgpu_device_deinitialize(ws->dev); | |||
FREE(rws); | |||
} | |||
struct radeon_winsys * | |||
radv_amdgpu_winsys_create(int fd) | |||
{ | |||
uint32_t drm_major, drm_minor, r; | |||
amdgpu_device_handle dev; | |||
struct radv_amdgpu_winsys *ws; | |||
r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev); | |||
if (r) | |||
return NULL; | |||
ws = calloc(1, sizeof(struct radv_amdgpu_winsys)); | |||
if (!ws) | |||
return NULL; | |||
ws->dev = dev; | |||
ws->info.drm_major = drm_major; | |||
ws->info.drm_minor = drm_minor; | |||
if (!do_winsys_init(ws, fd)) | |||
goto fail; | |||
ws->debug_all_bos = getenv("RADV_DEBUG_ALL_BOS") ? true : false; | |||
LIST_INITHEAD(&ws->global_bo_list); | |||
pthread_mutex_init(&ws->global_bo_list_lock, NULL); | |||
ws->base.query_info = radv_amdgpu_winsys_query_info; | |||
ws->base.destroy = radv_amdgpu_winsys_destroy; | |||
radv_amdgpu_bo_init_functions(ws); | |||
radv_amdgpu_cs_init_functions(ws); | |||
radv_amdgpu_surface_init_functions(ws); | |||
return &ws->base; | |||
fail: | |||
return NULL; | |||
} |
@@ -0,0 +1,57 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "radv_radeon_winsys.h" | |||
#include "addrlib/addrinterface.h" | |||
#include <amdgpu.h> | |||
#include "util/list.h" | |||
struct radv_amdgpu_winsys { | |||
struct radeon_winsys base; | |||
amdgpu_device_handle dev; | |||
struct radeon_info info; | |||
struct amdgpu_gpu_info amdinfo; | |||
ADDR_HANDLE addrlib; | |||
uint32_t rev_id; | |||
unsigned family; | |||
bool debug_all_bos; | |||
pthread_mutex_t global_bo_list_lock; | |||
struct list_head global_bo_list; | |||
unsigned num_buffers; | |||
bool use_ib_bos; | |||
}; | |||
static inline struct radv_amdgpu_winsys * | |||
radv_amdgpu_winsys(struct radeon_winsys *base) | |||
{ | |||
return (struct radv_amdgpu_winsys*)base; | |||
} |
@@ -0,0 +1,30 @@ | |||
/* | |||
* Copyright © 2016 Red Hat. | |||
* Copyright © 2016 Bas Nieuwenhuizen | |||
* | |||
* based on amdgpu winsys. | |||
* Copyright © 2011 Marek Olšák <maraeo@gmail.com> | |||
* Copyright © 2015 Advanced Micro Devices, Inc. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#pragma once | |||
struct radeon_winsys *radv_amdgpu_winsys_create(int fd); |