This helper used to load 16bit components from 32-bits read now allows skipping components with the new parameter first_component. The semantics now skip components until we reach the first_component, and then reads the number of components passed to the function. All previous uses of the helper are updated to use 0 as first_component. This will allow read 16-bit components when the first one is not aligned 32-bit. Enabling more usages of untyped_reads with 16-bit types. v2: (Jason Ektrand) Change parameters order to first_component, num_components Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>tags/18.1-branchpoint
@@ -194,7 +194,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, | |||
fs_reg dw = offset(vec4_result, bld, (const_offset & 0xf) / 4); | |||
switch (type_sz(dst.type)) { | |||
case 2: | |||
shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 1); | |||
shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 0, 1); | |||
bld.MOV(dst, subscript(dw, dst.type, (const_offset / 2) & 1)); | |||
break; | |||
case 4: |
@@ -505,6 +505,7 @@ fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld, | |||
void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder &bld, | |||
const fs_reg &dst, | |||
const fs_reg &src, | |||
uint32_t first_component, | |||
uint32_t components); | |||
void shuffle_16bit_data_for_32bit_write(const brw::fs_builder &bld, |
@@ -2316,7 +2316,7 @@ do_untyped_vector_read(const fs_builder &bld, | |||
shuffle_32bit_load_result_to_16bit_data(bld, | |||
retype(dest, BRW_REGISTER_TYPE_W), | |||
retype(read_result, BRW_REGISTER_TYPE_D), | |||
num_components); | |||
0, num_components); | |||
} else { | |||
assert(num_components == 1); | |||
/* scalar 16-bit are read using one byte_scattered_read message */ | |||
@@ -4912,6 +4912,7 @@ void | |||
shuffle_32bit_load_result_to_16bit_data(const fs_builder &bld, | |||
const fs_reg &dst, | |||
const fs_reg &src, | |||
uint32_t first_component, | |||
uint32_t components) | |||
{ | |||
assert(type_sz(src.type) == 4); | |||
@@ -4926,7 +4927,8 @@ shuffle_32bit_load_result_to_16bit_data(const fs_builder &bld, | |||
for (unsigned i = 0; i < components; i++) { | |||
const fs_reg component_i = | |||
subscript(offset(src, bld, i / 2), dst.type, i % 2); | |||
subscript(offset(src, bld, (first_component + i) / 2), dst.type, | |||
(first_component + i) % 2); | |||
bld.MOV(offset(tmp, bld, i % 2), component_i); | |||