This option allows us to remove additional s_waitcnt instructions because s_barrier internally does s_waitcnt 0. Though, apparently there is a problem with LDS accesses that causes rendering issues with FFXV and DXVK. Disable this optimization for now (RadeonSI still uses it). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107460 CC: 18.2 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>tags/18.3-branchpoint
@@ -149,7 +149,8 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, | |||
char features[256]; | |||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; | |||
LLVMTargetRef target = ac_get_llvm_target(triple); | |||
bool barrier_does_waitcnt = family != CHIP_VEGA20; | |||
bool barrier_does_waitcnt = (tm_options & AC_TM_AUTO_WAITCNT_BEFORE_BARRIER) && | |||
family != CHIP_VEGA20; | |||
snprintf(features, sizeof(features), | |||
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s", |
@@ -65,6 +65,7 @@ enum ac_target_machine_options { | |||
AC_TM_CHECK_IR = (1 << 5), | |||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6), | |||
AC_TM_CREATE_LOW_OPT = (1 << 7), | |||
AC_TM_AUTO_WAITCNT_BEFORE_BARRIER = (1 << 8), | |||
}; | |||
enum ac_float_mode { |
@@ -115,6 +115,7 @@ static void si_init_compiler(struct si_screen *sscreen, | |||
sscreen->info.chip_class <= VI; | |||
enum ac_target_machine_options tm_options = | |||
AC_TM_AUTO_WAITCNT_BEFORE_BARRIER | | |||
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | | |||
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) | | |||
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) | |