The introduction of 16-bit types with VK_KHR_16bit_storages implies that push constant offsets could be multiple of 2-bytes. Some assertions are updated so offsets should be just multiple of size of the base type but in some cases we can not assume it as doubles aren't aligned to 8 bytes in some cases. For 16-bit types, the push constant offset takes into account the internal offset in the 32-bit uniform bucket adding 2-bytes when we access not 32-bit aligned elements. In all 32-bit aligned cases it just becomes 0. v2: Assert offsets to be aligned to the dest type size. (Jason Ekstrand) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>tags/18.1-branchpoint
@@ -753,8 +753,6 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, | |||
} | |||
if (op == nir_intrinsic_load_push_constant) { | |||
vtn_assert(access_offset % 4 == 0); | |||
nir_intrinsic_set_base(instr, access_offset); | |||
nir_intrinsic_set_range(instr, access_size); | |||
} |
@@ -3882,16 +3882,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr | |||
break; | |||
case nir_intrinsic_load_uniform: { | |||
/* Offsets are in bytes but they should always be multiples of 4 */ | |||
assert(instr->const_index[0] % 4 == 0); | |||
/* Offsets are in bytes but they should always aligned to | |||
* the type size | |||
*/ | |||
assert(instr->const_index[0] % 4 == 0 || | |||
instr->const_index[0] % type_sz(dest.type) == 0); | |||
fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type); | |||
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); | |||
if (const_offset) { | |||
/* Offsets are in bytes but they should always be multiples of 4 */ | |||
assert(const_offset->u32[0] % 4 == 0); | |||
src.offset = const_offset->u32[0]; | |||
assert(const_offset->u32[0] % type_sz(dest.type) == 0); | |||
/* For 16-bit types we add the module of the const_index[0] | |||
* offset to access to not 32-bit aligned element | |||
*/ | |||
src.offset = const_offset->u32[0] + instr->const_index[0] % 4; | |||
for (unsigned j = 0; j < instr->num_components; j++) { | |||
bld.MOV(offset(dest, bld, j), offset(src, bld, j)); |
@@ -41,8 +41,6 @@ anv_nir_lower_push_constants(nir_shader *shader) | |||
if (intrin->intrinsic != nir_intrinsic_load_push_constant) | |||
continue; | |||
assert(intrin->const_index[0] % 4 == 0); | |||
/* We just turn them into uniform loads */ | |||
intrin->intrinsic = nir_intrinsic_load_uniform; | |||
} |