Allow for example to convert from 4 x float32 to 4 x unorm8 and vice versa. Uses code and ideas from Brian Paul.tags/mesa-7.9-rc1
* | * | ||||
* Although the result values can be scaled to an arbitrary bit width specified | * Although the result values can be scaled to an arbitrary bit width specified | ||||
* by dst_width, the actual result type will have the same width. | * by dst_width, the actual result type will have the same width. | ||||
* | |||||
* Ex: src = { float, float, float, float } | |||||
* return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. | |||||
*/ | */ | ||||
LLVMValueRef | LLVMValueRef | ||||
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, | lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, | ||||
/** | /** | ||||
* Inverse of lp_build_clamped_float_to_unsigned_norm above. | * Inverse of lp_build_clamped_float_to_unsigned_norm above. | ||||
* Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] | |||||
* return {float, float, float, float} with values in range [0, 1]. | |||||
*/ | */ | ||||
LLVMValueRef | LLVMValueRef | ||||
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, | lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, | ||||
unsigned num_tmps; | unsigned num_tmps; | ||||
unsigned i; | unsigned i; | ||||
/* Register width must remain constant */ | |||||
assert(src_type.width * src_type.length == dst_type.width * dst_type.length); | |||||
/* We must not loose or gain channels. Only precision */ | /* We must not loose or gain channels. Only precision */ | ||||
assert(src_type.length * num_srcs == dst_type.length * num_dsts); | assert(src_type.length * num_srcs == dst_type.length * num_dsts); | ||||
assert(src_type.length <= LP_MAX_VECTOR_LENGTH); | assert(src_type.length <= LP_MAX_VECTOR_LENGTH); | ||||
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); | assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); | ||||
assert(num_srcs <= LP_MAX_VECTOR_LENGTH); | |||||
assert(num_dsts <= LP_MAX_VECTOR_LENGTH); | |||||
tmp_type = src_type; | tmp_type = src_type; | ||||
for(i = 0; i < num_srcs; ++i) | for(i = 0; i < num_srcs; ++i) | ||||
assert(!tmp_type.floating || tmp_type.width == dst_type.width); | assert(!tmp_type.floating || tmp_type.width == dst_type.width); | ||||
if(tmp_type.width > dst_type.width) { | |||||
assert(num_dsts == 1); | |||||
tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); | |||||
tmp_type.width = dst_type.width; | |||||
tmp_type.length = dst_type.length; | |||||
num_tmps = 1; | |||||
} | |||||
if(tmp_type.width < dst_type.width) { | |||||
assert(num_tmps == 1); | |||||
lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); | |||||
tmp_type.width = dst_type.width; | |||||
tmp_type.length = dst_type.length; | |||||
num_tmps = num_dsts; | |||||
} | |||||
lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts); | |||||
assert(tmp_type.width == dst_type.width); | |||||
assert(tmp_type.length == dst_type.length); | |||||
assert(num_tmps == num_dsts); | |||||
tmp_type.width = dst_type.width; | |||||
tmp_type.length = dst_type.length; | |||||
num_tmps = num_dsts; | |||||
/* | /* | ||||
* Scale to the widest range | * Scale to the widest range |
return tmp[0]; | return tmp[0]; | ||||
} | } | ||||
/** | |||||
* Truncate or expand the bitwidth | |||||
*/ | |||||
void | |||||
lp_build_resize(LLVMBuilderRef builder, | |||||
struct lp_type src_type, | |||||
struct lp_type dst_type, | |||||
const LLVMValueRef *src, unsigned num_srcs, | |||||
LLVMValueRef *dst, unsigned num_dsts) | |||||
{ | |||||
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; | |||||
unsigned i; | |||||
assert(!src_type.floating || src_type.width == dst_type.width); | |||||
/* We must not loose or gain channels. Only precision */ | |||||
assert(src_type.length * num_srcs == dst_type.length * num_dsts); | |||||
/* We don't support M:N conversion, only 1:N, M:1, or 1:1 */ | |||||
assert(num_srcs == 1 || num_dsts == 1); | |||||
assert(src_type.length <= LP_MAX_VECTOR_LENGTH); | |||||
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); | |||||
assert(num_srcs <= LP_MAX_VECTOR_LENGTH); | |||||
assert(num_dsts <= LP_MAX_VECTOR_LENGTH); | |||||
if (src_type.width > dst_type.width) { | |||||
/* | |||||
* Truncate bit width. | |||||
*/ | |||||
assert(num_dsts == 1); | |||||
if (src_type.width * src_type.length == dst_type.width * dst_type.length) { | |||||
/* | |||||
* Register width remains constant -- use vector packing intrinsics | |||||
*/ | |||||
tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); | |||||
} | |||||
else { | |||||
/* | |||||
* Do it element-wise. | |||||
*/ | |||||
assert(src_type.length == dst_type.length); | |||||
tmp[0] = lp_build_undef(dst_type); | |||||
for (i = 0; i < dst_type.length; ++i) { | |||||
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); | |||||
LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); | |||||
val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), ""); | |||||
tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); | |||||
} | |||||
} | |||||
} | |||||
else if (src_type.width < dst_type.width) { | |||||
/* | |||||
* Expand bit width. | |||||
*/ | |||||
assert(num_srcs == 1); | |||||
if (src_type.width * src_type.length == dst_type.width * dst_type.length) { | |||||
/* | |||||
* Register width remains constant -- use vector unpack intrinsics | |||||
*/ | |||||
lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts); | |||||
} | |||||
else { | |||||
/* | |||||
* Do it element-wise. | |||||
*/ | |||||
assert(src_type.length == dst_type.length); | |||||
tmp[0] = lp_build_undef(dst_type); | |||||
for (i = 0; i < dst_type.length; ++i) { | |||||
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); | |||||
LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); | |||||
if (src_type.sign && dst_type.sign) { | |||||
val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), ""); | |||||
} else { | |||||
val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), ""); | |||||
} | |||||
tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); | |||||
} | |||||
} | |||||
} | |||||
else { | |||||
/* | |||||
* No-op | |||||
*/ | |||||
assert(num_srcs == 1); | |||||
assert(num_dsts == 1); | |||||
tmp[0] = src[0]; | |||||
} | |||||
for(i = 0; i < num_dsts; ++i) | |||||
dst[i] = tmp[i]; | |||||
} | |||||
const LLVMValueRef *src, unsigned num_srcs); | const LLVMValueRef *src, unsigned num_srcs); | ||||
void | |||||
lp_build_resize(LLVMBuilderRef builder, | |||||
struct lp_type src_type, | |||||
struct lp_type dst_type, | |||||
const LLVMValueRef *src, unsigned num_srcs, | |||||
LLVMValueRef *dst, unsigned num_dsts); | |||||
#endif /* !LP_BLD_PACK_H */ | #endif /* !LP_BLD_PACK_H */ |
unsigned i, j; | unsigned i, j; | ||||
void *code; | void *code; | ||||
if (src_type.width * src_type.length != dst_type.width * dst_type.length || | |||||
src_type.length != dst_type.length) { | |||||
return TRUE; | |||||
} | |||||
if(verbose >= 1) | if(verbose >= 1) | ||||
dump_conv_types(stdout, src_type, dst_type); | dump_conv_types(stdout, src_type, dst_type); | ||||
if(src_type.length > dst_type.length) { | |||||
if (src_type.length > dst_type.length) { | |||||
num_srcs = 1; | num_srcs = 1; | ||||
num_dsts = src_type.length/dst_type.length; | num_dsts = src_type.length/dst_type.length; | ||||
} | } | ||||
else { | |||||
else if (src_type.length < dst_type.length) { | |||||
num_dsts = 1; | num_dsts = 1; | ||||
num_srcs = dst_type.length/src_type.length; | num_srcs = dst_type.length/src_type.length; | ||||
} | } | ||||
assert(src_type.width * src_type.length == dst_type.width * dst_type.length); | |||||
else { | |||||
num_dsts = 1; | |||||
num_srcs = 1; | |||||
} | |||||
/* We must not loose or gain channels. Only precision */ | /* We must not loose or gain channels. Only precision */ | ||||
assert(src_type.length * num_srcs == dst_type.length * num_dsts); | assert(src_type.length * num_srcs == dst_type.length * num_dsts); | ||||
{ FALSE, FALSE, TRUE, FALSE, 8, 16 }, | { FALSE, FALSE, TRUE, FALSE, 8, 16 }, | ||||
{ FALSE, FALSE, FALSE, TRUE, 8, 16 }, | { FALSE, FALSE, FALSE, TRUE, 8, 16 }, | ||||
{ FALSE, FALSE, FALSE, FALSE, 8, 16 }, | { FALSE, FALSE, FALSE, FALSE, 8, 16 }, | ||||
{ FALSE, FALSE, TRUE, TRUE, 8, 4 }, | |||||
{ FALSE, FALSE, TRUE, FALSE, 8, 4 }, | |||||
{ FALSE, FALSE, FALSE, TRUE, 8, 4 }, | |||||
{ FALSE, FALSE, FALSE, FALSE, 8, 4 }, | |||||
}; | }; | ||||