Sfoglia il codice sorgente

gallivm: Fix lerping of (un)signed normalized numbers.

Several issues actually:

- Fix a regression in unsigned normalized in the rescaling
  [0, 255] to [0, 256]

- Ensure we use signed shifts where appropriate (instead of
  unsigned shifts)

- Refactor the code slightly -- move all the logic inside
  lp_build_lerp_simple().

This change, plus an adjustment in the tolerance of signed normalized
results in piglit fbo-blending-formats fixes bug 57903

Reviewed-by: Brian Paul <brianp@vmware.com>
tags/gles3-fmt-v1
José Fonseca 13 anni fa
parent
commit
5e99cd9159
1 ha cambiato i file con 43 aggiunte e 49 eliminazioni
  1. 43
    49
      src/gallium/auxiliary/gallivm/lp_bld_arit.c

+ 43
- 49
src/gallium/auxiliary/gallivm/lp_bld_arit.c Vedi File

@@ -685,7 +685,7 @@ lp_build_sub(struct lp_build_context *bld,
/**
* Normalized multiplication.
*
* There are several approaches here (using 8-bit normalized multiplication as
* There are several approaches for (using 8-bit normalized multiplication as
* an example):
*
* - alpha plus one
@@ -694,7 +694,7 @@ lp_build_sub(struct lp_build_context *bld,
*
* a*b/255 ~= (a*(b + 1)) >> 256
*
* which is the fastest method that satisfies the following OpenGL criteria
* which is the fastest method that satisfies the following OpenGL criteria of
*
* 0*0 = 0 and 255*255 = 255
*
@@ -710,7 +710,7 @@ lp_build_sub(struct lp_build_context *bld,
*
* note that just by itself it doesn't satisfies the OpenGL criteria, as
* 255*255 = 254, so the special case b = 255 must be accounted or roundoff
* must be used
* must be used.
*
* - geometric series plus rounding
*
@@ -719,7 +719,9 @@ lp_build_sub(struct lp_build_context *bld,
*
* t/255 ~= (t + (t >> 8) + 0x80) >> 8
*
* achieving the exact results
* achieving the exact results.
*
*
*
* @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
* ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
@@ -733,8 +735,7 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
{
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context bld;
unsigned bits;
LLVMValueRef shift;
unsigned n;
LLVMValueRef half;
LLVMValueRef ab;

@@ -744,29 +745,28 @@ lp_build_mul_norm(struct gallivm_state *gallivm,

lp_build_context_init(&bld, gallivm, wide_type);

bits = wide_type.width / 2;
n = wide_type.width / 2;
if (wide_type.sign) {
--bits;
--n;
}

shift = lp_build_const_int_vec(gallivm, wide_type, bits);
/*
* TODO: for 16bits normalized SSE2 vectors we could consider using PMULHUW
* http://ssp.impulsetrain.com/2011/07/03/multiplying-normalized-16-bit-numbers-with-sse2/
*/

#if 0
/* a*b/255 ~= (a*(b + 1)) >> 256 */
/* XXX: This would not work for signed types */
assert(!wide_type.sign);
b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, wide_type, 1), "");
ab = LLVMBuildMul(builder, a, b, "");
/*
* a*b / (2**n - 1) ~= (a*b + (a*b >> n) + half) >> n
*/

#else
/* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
ab = LLVMBuildMul(builder, a, b, "");
ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, shift, ""), "");
ab = LLVMBuildAdd(builder, ab, lp_build_shr_imm(&bld, ab, n), "");

/* Add rounding term */
half = lp_build_const_int_vec(gallivm, wide_type, 1 << (bits - 1));
/*
* half = sgn(ab) * 0.5 * (2 ** n) = sgn(ab) * (1 << (n - 1))
*/

half = lp_build_const_int_vec(gallivm, wide_type, 1 << (n - 1));
if (wide_type.sign) {
LLVMValueRef minus_half = LLVMBuildNeg(builder, half, "");
LLVMValueRef sign = lp_build_shr_imm(&bld, half, wide_type.width - 1);
@@ -774,9 +774,8 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
}
ab = LLVMBuildAdd(builder, ab, half, "");

#endif
ab = LLVMBuildLShr(builder, ab, shift, "");
/* Final division */
ab = lp_build_shr_imm(&bld, ab, n);

return ab;
}
@@ -988,14 +987,28 @@ lp_build_lerp_simple(struct lp_build_context *bld,

delta = lp_build_sub(bld, v1, v0);

res = lp_build_mul(bld, x, delta);

if (normalized) {
if (bld->type.sign) {
res = lp_build_shr_imm(bld, res, half_width - 1);
} else {
if (!bld->type.sign) {
/*
* Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
* most-significant-bit to the lowest-significant-bit, so that
* later we can just divide by 2**n instead of 2**n - 1.
*/
x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));

/* (x * delta) >> n */
res = lp_build_mul(bld, x, delta);
res = lp_build_shr_imm(bld, res, half_width);
} else {
/*
* The rescaling trick above doesn't work for signed numbers, so
* use the 2**n - 1 divison approximation in lp_build_mul_norm
* instead.
*/
res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
}
} else {
res = lp_build_mul(bld, x, delta);
}

res = lp_build_add(bld, v0, res);
@@ -1022,7 +1035,6 @@ lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef v0,
LLVMValueRef v1)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
LLVMValueRef res;

@@ -1034,8 +1046,6 @@ lp_build_lerp(struct lp_build_context *bld,
struct lp_type wide_type;
struct lp_build_context wide_bld;
LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
unsigned bits;
LLVMValueRef shift;

assert(type.length >= 2);

@@ -1054,22 +1064,6 @@ lp_build_lerp(struct lp_build_context *bld,
lp_build_unpack2(bld->gallivm, type, wide_type, v0, &v0l, &v0h);
lp_build_unpack2(bld->gallivm, type, wide_type, v1, &v1l, &v1h);

/*
* Scale x from [0, 255] to [0, 256]
*/

bits = type.width - 1;
if (type.sign) {
--bits;
}

shift = lp_build_const_int_vec(bld->gallivm, wide_type, bits - 1);

xl = lp_build_add(&wide_bld, xl,
LLVMBuildAShr(builder, xl, shift, ""));
xh = lp_build_add(&wide_bld, xh,
LLVMBuildAShr(builder, xh, shift, ""));

/*
* Lerp both halves.
*/

Loading…
Annulla
Salva