One multiplication instead of two. Also fix floating point random number generation and verification. TODO: Do the same for AoS blending.tags/snb-magic
@@ -320,9 +320,6 @@ lp_build_blend_aos(LLVMBuilderRef builder, | |||
if(!blend->rt[rt].blend_enable) | |||
return src; | |||
/* It makes no sense to blend unless values are normalized */ | |||
assert(type.norm); | |||
/* Setup build context */ | |||
memset(&bld, 0, sizeof bld); | |||
lp_build_context_init(&bld.base, builder, type); |
@@ -1,6 +1,6 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2009 VMware, Inc. | |||
* Copyright 2009-2010 VMware, Inc. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
@@ -195,6 +195,13 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, | |||
} | |||
static boolean | |||
lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) | |||
{ | |||
return dst_factor == (src_factor ^ 0x10); | |||
} | |||
/** | |||
* Generate blend code in SOA mode. | |||
* \param rt render target index (to index the blend / colormask state) | |||
@@ -243,8 +250,41 @@ lp_build_blend_soa(LLVMBuilderRef builder, | |||
unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; | |||
boolean func_commutative = lp_build_blend_func_commutative(func); | |||
/* It makes no sense to blend unless values are normalized */ | |||
assert(type.norm); | |||
if (func == PIPE_BLEND_ADD && | |||
lp_build_blend_factor_complementary(src_factor, dst_factor) && 0) { | |||
/* | |||
* Special case linear interpolation, (i.e., complementary factors). | |||
*/ | |||
LLVMValueRef weight; | |||
if (src_factor < dst_factor) { | |||
weight = lp_build_blend_soa_factor(&bld, src_factor, i); | |||
res[i] = lp_build_lerp(&bld.base, weight, dst[i], src[i]); | |||
} else { | |||
weight = lp_build_blend_soa_factor(&bld, dst_factor, i); | |||
res[i] = lp_build_lerp(&bld.base, weight, src[i], dst[i]); | |||
} | |||
continue; | |||
} | |||
if ((func == PIPE_BLEND_ADD || | |||
func == PIPE_BLEND_SUBTRACT || | |||
func == PIPE_BLEND_REVERSE_SUBTRACT) && | |||
src_factor == dst_factor && | |||
type.floating) { | |||
/* | |||
* Special common factor. | |||
* | |||
* XXX: Only for floating points for now, since saturation will | |||
* cause different results. | |||
*/ | |||
LLVMValueRef factor; | |||
factor = lp_build_blend_soa_factor(&bld, src_factor, i); | |||
res[i] = lp_build_blend_func(&bld.base, func, src[i], dst[i]); | |||
res[i] = lp_build_mul(&bld.base, res[i], factor); | |||
continue; | |||
} | |||
/* | |||
* Compute src/dst factors. |
@@ -243,19 +243,6 @@ add_blend_test(LLVMModuleRef module, | |||
} | |||
/** Add and limit result to ceiling of 1.0 */ | |||
#define ADD_SAT(R, A, B) \ | |||
do { \ | |||
R = (A) + (B); if (R > 1.0f) R = 1.0f; \ | |||
} while (0) | |||
/** Subtract and limit result to floor of 0.0 */ | |||
#define SUB_SAT(R, A, B) \ | |||
do { \ | |||
R = (A) - (B); if (R < 0.0f) R = 0.0f; \ | |||
} while (0) | |||
static void | |||
compute_blend_ref_term(unsigned rgb_factor, | |||
unsigned alpha_factor, | |||
@@ -423,19 +410,19 @@ compute_blend_ref(const struct pipe_blend_state *blend, | |||
*/ | |||
switch (blend->rt[0].rgb_func) { | |||
case PIPE_BLEND_ADD: | |||
ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ | |||
ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ | |||
ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ | |||
res[0] = src_term[0] + dst_term[0]; /* R */ | |||
res[1] = src_term[1] + dst_term[1]; /* G */ | |||
res[2] = src_term[2] + dst_term[2]; /* B */ | |||
break; | |||
case PIPE_BLEND_SUBTRACT: | |||
SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ | |||
SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ | |||
SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ | |||
res[0] = src_term[0] - dst_term[0]; /* R */ | |||
res[1] = src_term[1] - dst_term[1]; /* G */ | |||
res[2] = src_term[2] - dst_term[2]; /* B */ | |||
break; | |||
case PIPE_BLEND_REVERSE_SUBTRACT: | |||
SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ | |||
SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ | |||
SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ | |||
res[0] = dst_term[0] - src_term[0]; /* R */ | |||
res[1] = dst_term[1] - src_term[1]; /* G */ | |||
res[2] = dst_term[2] - src_term[2]; /* B */ | |||
break; | |||
case PIPE_BLEND_MIN: | |||
res[0] = MIN2(src_term[0], dst_term[0]); /* R */ | |||
@@ -456,13 +443,13 @@ compute_blend_ref(const struct pipe_blend_state *blend, | |||
*/ | |||
switch (blend->rt[0].alpha_func) { | |||
case PIPE_BLEND_ADD: | |||
ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ | |||
res[3] = src_term[3] + dst_term[3]; /* A */ | |||
break; | |||
case PIPE_BLEND_SUBTRACT: | |||
SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ | |||
res[3] = src_term[3] - dst_term[3]; /* A */ | |||
break; | |||
case PIPE_BLEND_REVERSE_SUBTRACT: | |||
SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ | |||
res[3] = dst_term[3] - src_term[3]; /* A */ | |||
break; | |||
case PIPE_BLEND_MIN: | |||
res[3] = MIN2(src_term[3], dst_term[3]); /* A */ | |||
@@ -676,6 +663,8 @@ test_one(unsigned verbose, | |||
fprintf(stderr, " Ref%c: ", channel); | |||
dump_vec(stderr, type, ref + j*stride); | |||
fprintf(stderr, "\n"); | |||
fprintf(stderr, "\n"); | |||
} | |||
} | |||
} | |||
@@ -773,7 +762,7 @@ blend_funcs[] = { | |||
const struct lp_type blend_types[] = { | |||
/* float, fixed, sign, norm, width, len */ | |||
{ TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ | |||
{ TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */ | |||
{ FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ | |||
}; | |||
@@ -205,16 +205,19 @@ random_elem(struct lp_type type, void *dst, unsigned index) | |||
assert(index < type.length); | |||
value = (double)rand()/(double)RAND_MAX; | |||
if(!type.norm) { | |||
unsigned long long mask; | |||
if (type.floating) | |||
mask = ~(unsigned long long)0; | |||
else if (type.fixed) | |||
mask = ((unsigned long long)1 << (type.width / 2)) - 1; | |||
else if (type.sign) | |||
mask = ((unsigned long long)1 << (type.width - 1)) - 1; | |||
else | |||
mask = ((unsigned long long)1 << type.width) - 1; | |||
value += (double)(mask & rand()); | |||
if (type.floating) { | |||
value *= 2.0; | |||
} | |||
else { | |||
unsigned long long mask; | |||
if (type.fixed) | |||
mask = ((unsigned long long)1 << (type.width / 2)) - 1; | |||
else if (type.sign) | |||
mask = ((unsigned long long)1 << (type.width - 1)) - 1; | |||
else | |||
mask = ((unsigned long long)1 << type.width) - 1; | |||
value += (double)(mask & rand()); | |||
} | |||
} | |||
if(!type.sign) | |||
if(rand() & 1) | |||
@@ -261,12 +264,18 @@ boolean | |||
compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) | |||
{ | |||
unsigned i; | |||
eps *= type.floating ? 8.0 : 2.0; | |||
for (i = 0; i < type.length; ++i) { | |||
double res_elem = read_elem(type, res, i); | |||
double ref_elem = read_elem(type, ref, i); | |||
double delta = fabs(res_elem - ref_elem); | |||
if(delta >= 2.0*eps) | |||
double delta = res_elem - ref_elem; | |||
if (ref_elem < -1.0 || ref_elem > 1.0) { | |||
delta /= ref_elem; | |||
} | |||
delta = fabs(delta); | |||
if (delta >= eps) { | |||
return FALSE; | |||
} | |||
} | |||
return TRUE; |