|
|
@@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc, |
|
|
|
if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { |
|
|
|
type.floating = TRUE; |
|
|
|
assert(z_swizzle == 0); |
|
|
|
assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); |
|
|
|
assert(format_desc->channel[z_swizzle].size == 32); |
|
|
|
} |
|
|
|
else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { |
|
|
|
assert(format_desc->block.bits <= 32); |
|
|
@@ -347,15 +347,18 @@ static boolean |
|
|
|
get_z_shift_and_mask(const struct util_format_description *format_desc, |
|
|
|
unsigned *shift, unsigned *width, unsigned *mask) |
|
|
|
{ |
|
|
|
const unsigned total_bits = format_desc->block.bits; |
|
|
|
unsigned total_bits; |
|
|
|
unsigned z_swizzle; |
|
|
|
unsigned chan; |
|
|
|
unsigned padding_left, padding_right; |
|
|
|
|
|
|
|
|
|
|
|
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
|
|
|
assert(format_desc->block.width == 1); |
|
|
|
assert(format_desc->block.height == 1); |
|
|
|
|
|
|
|
/* 64bit d/s format is special already extracted 32 bits */ |
|
|
|
total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits; |
|
|
|
|
|
|
|
z_swizzle = format_desc->swizzle[0]; |
|
|
|
|
|
|
|
if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) |
|
|
@@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, |
|
|
|
if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) |
|
|
|
return FALSE; |
|
|
|
|
|
|
|
/* just special case 64bit d/s format */ |
|
|
|
if (format_desc->block.bits > 32) { |
|
|
|
assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
|
|
|
*shift = 0; |
|
|
|
*mask = 0xff; |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
|
|
|
|
*shift = 0; |
|
|
|
for (chan = 0; chan < s_swizzle; chan++) |
|
|
|
*shift += format_desc->channel[chan].size; |
|
|
@@ -517,24 +528,31 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, |
|
|
|
* \param loop_counter the current loop iteration |
|
|
|
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block |
|
|
|
* \param depth_stride stride of the depth/stencil buffer |
|
|
|
* \param z_fb contains z values loaded from fb (may include padding) |
|
|
|
* \param s_fb contains s values loaded from fb (may include padding) |
|
|
|
*/ |
|
|
|
LLVMValueRef |
|
|
|
void |
|
|
|
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, |
|
|
|
struct lp_type z_src_type, |
|
|
|
const struct util_format_description *format_desc, |
|
|
|
LLVMValueRef depth_ptr, |
|
|
|
LLVMValueRef depth_stride, |
|
|
|
LLVMValueRef *z_fb, |
|
|
|
LLVMValueRef *s_fb, |
|
|
|
LLVMValueRef loop_counter) |
|
|
|
{ |
|
|
|
LLVMBuilderRef builder = gallivm->builder; |
|
|
|
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; |
|
|
|
LLVMValueRef zs_dst, zs_dst1, zs_dst2; |
|
|
|
LLVMValueRef zs_dst1, zs_dst2; |
|
|
|
LLVMValueRef zs_dst_ptr; |
|
|
|
LLVMValueRef depth_offset1, depth_offset2; |
|
|
|
unsigned depth_bits = format_desc->block.bits/8; |
|
|
|
LLVMTypeRef load_ptr_type; |
|
|
|
unsigned depth_bytes = format_desc->block.bits / 8; |
|
|
|
struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); |
|
|
|
struct lp_type zs_load_type = zs_type; |
|
|
|
|
|
|
|
zs_load_type.length = zs_load_type.length / 2; |
|
|
|
load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); |
|
|
|
|
|
|
|
if (z_src_type.length == 4) { |
|
|
|
unsigned i; |
|
|
@@ -545,7 +563,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, |
|
|
|
LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, |
|
|
|
depth_stride, ""); |
|
|
|
depth_offset1 = LLVMBuildMul(builder, looplsb, |
|
|
|
lp_build_const_int32(gallivm, depth_bits * 2), ""); |
|
|
|
lp_build_const_int32(gallivm, depth_bytes * 2), ""); |
|
|
|
depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); |
|
|
|
|
|
|
|
/* just concatenate the loaded 2x2 values into 4-wide vector */ |
|
|
@@ -564,7 +582,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, |
|
|
|
* 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. |
|
|
|
*/ |
|
|
|
for (i = 0; i < 8; i++) { |
|
|
|
|
|
|
|
shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); |
|
|
|
} |
|
|
|
} |
|
|
@@ -573,63 +590,103 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, |
|
|
|
|
|
|
|
/* Load current z/stencil values from z/stencil buffer */ |
|
|
|
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); |
|
|
|
zs_dst_ptr = LLVMBuildBitCast(builder, |
|
|
|
zs_dst_ptr, |
|
|
|
LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); |
|
|
|
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); |
|
|
|
zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); |
|
|
|
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); |
|
|
|
zs_dst_ptr = LLVMBuildBitCast(builder, |
|
|
|
zs_dst_ptr, |
|
|
|
LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); |
|
|
|
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); |
|
|
|
zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); |
|
|
|
|
|
|
|
zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, |
|
|
|
LLVMConstVector(shuffles, zs_type.length), ""); |
|
|
|
*z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, |
|
|
|
LLVMConstVector(shuffles, zs_type.length), ""); |
|
|
|
*s_fb = *z_fb; |
|
|
|
|
|
|
|
if (format_desc->block.bits < z_src_type.width) { |
|
|
|
/* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ |
|
|
|
zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), ""); |
|
|
|
*z_fb = LLVMBuildZExt(builder, *z_fb, |
|
|
|
lp_build_int_vec_type(gallivm, z_src_type), ""); |
|
|
|
} |
|
|
|
|
|
|
|
lp_build_name(zs_dst, "zs_dst"); |
|
|
|
else if (format_desc->block.bits > 32) { |
|
|
|
/* rely on llvm to handle too wide vector we have here nicely */ |
|
|
|
unsigned i; |
|
|
|
struct lp_type typex2 = zs_type; |
|
|
|
struct lp_type s_type = zs_type; |
|
|
|
LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; |
|
|
|
LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; |
|
|
|
LLVMValueRef tmp; |
|
|
|
|
|
|
|
typex2.width = typex2.width / 2; |
|
|
|
typex2.length = typex2.length * 2; |
|
|
|
s_type.width = s_type.width / 2; |
|
|
|
s_type.floating = 0; |
|
|
|
|
|
|
|
tmp = LLVMBuildBitCast(builder, *z_fb, |
|
|
|
lp_build_vec_type(gallivm, typex2), ""); |
|
|
|
|
|
|
|
for (i = 0; i < zs_type.length; i++) { |
|
|
|
shuffles1[i] = lp_build_const_int32(gallivm, i * 2); |
|
|
|
shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); |
|
|
|
} |
|
|
|
*z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, |
|
|
|
LLVMConstVector(shuffles1, zs_type.length), ""); |
|
|
|
*s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, |
|
|
|
LLVMConstVector(shuffles2, zs_type.length), ""); |
|
|
|
*s_fb = LLVMBuildBitCast(builder, *s_fb, |
|
|
|
lp_build_vec_type(gallivm, s_type), ""); |
|
|
|
lp_build_name(*s_fb, "s_dst"); |
|
|
|
} |
|
|
|
|
|
|
|
return zs_dst; |
|
|
|
lp_build_name(*z_fb, "z_dst"); |
|
|
|
lp_build_name(*s_fb, "s_dst"); |
|
|
|
lp_build_name(*z_fb, "z_dst"); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Store depth/stencil values. |
|
|
|
* Incoming values are swizzled (typically n 2x2 quads), stored linear. |
|
|
|
* If there's a mask it will do reload/select/store otherwise just store. |
|
|
|
* If there's a mask it will do select/store otherwise just store. |
|
|
|
* |
|
|
|
* \param type the data type of the fragment depth/stencil values |
|
|
|
* \param format_desc description of the depth/stencil surface |
|
|
|
* \param mask the alive/dead pixel mask for the quad (vector) |
|
|
|
* \param z_fb z values read from fb (with padding) |
|
|
|
* \param s_fb s values read from fb (with padding) |
|
|
|
* \param loop_counter the current loop iteration |
|
|
|
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block |
|
|
|
* \param depth_stride stride of the depth/stencil buffer |
|
|
|
* \param zs_value the depth/stencil values to store |
|
|
|
* \param z_value the depth values to store (with padding) |
|
|
|
* \param s_value the stencil values to store (with padding) |
|
|
|
*/ |
|
|
|
void |
|
|
|
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, |
|
|
|
struct lp_type z_src_type, |
|
|
|
const struct util_format_description *format_desc, |
|
|
|
struct lp_build_mask_context *mask, |
|
|
|
LLVMValueRef z_fb, |
|
|
|
LLVMValueRef s_fb, |
|
|
|
LLVMValueRef loop_counter, |
|
|
|
LLVMValueRef depth_ptr, |
|
|
|
LLVMValueRef depth_stride, |
|
|
|
LLVMValueRef zs_value) |
|
|
|
LLVMValueRef z_value, |
|
|
|
LLVMValueRef s_value) |
|
|
|
{ |
|
|
|
struct lp_build_context z_bld; |
|
|
|
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; |
|
|
|
LLVMBuilderRef builder = gallivm->builder; |
|
|
|
LLVMValueRef mask_value = NULL; |
|
|
|
LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2; |
|
|
|
LLVMValueRef zs_dst1, zs_dst2; |
|
|
|
LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; |
|
|
|
LLVMValueRef depth_offset1, depth_offset2; |
|
|
|
unsigned depth_bits = format_desc->block.bits/8; |
|
|
|
LLVMTypeRef load_ptr_type; |
|
|
|
unsigned depth_bytes = format_desc->block.bits / 8; |
|
|
|
struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); |
|
|
|
struct lp_type zs_load_type = zs_type; |
|
|
|
|
|
|
|
zs_load_type.length = zs_load_type.length / 2; |
|
|
|
load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); |
|
|
|
|
|
|
|
if (zs_type.width > 32) |
|
|
|
zs_type.width = 32; |
|
|
|
|
|
|
|
lp_build_context_init(&z_bld, gallivm, zs_type); |
|
|
|
|
|
|
@@ -638,7 +695,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, |
|
|
|
* outside the fs loop to avoid all the swizzle stuff. |
|
|
|
*/ |
|
|
|
if (z_src_type.length == 4) { |
|
|
|
unsigned i; |
|
|
|
LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, |
|
|
|
lp_build_const_int32(gallivm, 1), ""); |
|
|
|
LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, |
|
|
@@ -646,13 +702,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, |
|
|
|
LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, |
|
|
|
depth_stride, ""); |
|
|
|
depth_offset1 = LLVMBuildMul(builder, looplsb, |
|
|
|
lp_build_const_int32(gallivm, depth_bits * 2), ""); |
|
|
|
lp_build_const_int32(gallivm, depth_bytes * 2), ""); |
|
|
|
depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); |
|
|
|
|
|
|
|
/* just concatenate the loaded 2x2 values into 4-wide vector */ |
|
|
|
for (i = 0; i < 4; i++) { |
|
|
|
shuffles[i] = lp_build_const_int32(gallivm, i); |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
unsigned i; |
|
|
@@ -669,55 +720,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); |
|
|
|
|
|
|
|
zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); |
|
|
|
zs_dst_ptr1 = LLVMBuildBitCast(builder, |
|
|
|
zs_dst_ptr1, |
|
|
|
LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); |
|
|
|
zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); |
|
|
|
zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); |
|
|
|
zs_dst_ptr2 = LLVMBuildBitCast(builder, |
|
|
|
zs_dst_ptr2, |
|
|
|
LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); |
|
|
|
zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); |
|
|
|
|
|
|
|
if (mask) { |
|
|
|
zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, ""); |
|
|
|
zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, ""); |
|
|
|
zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, |
|
|
|
LLVMConstVector(shuffles, zs_type.length), |
|
|
|
"zsbufval"); |
|
|
|
if (format_desc->block.bits > 32) { |
|
|
|
s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); |
|
|
|
} |
|
|
|
|
|
|
|
if (mask) { |
|
|
|
mask_value = lp_build_mask_value(mask); |
|
|
|
z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); |
|
|
|
if (format_desc->block.bits > 32) { |
|
|
|
s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); |
|
|
|
s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (zs_type.width < z_src_type.width) { |
|
|
|
/* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */ |
|
|
|
zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, ""); |
|
|
|
if (mask) |
|
|
|
mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, ""); |
|
|
|
/* Truncate ZS values (e.g., when writing to Z16_UNORM) */ |
|
|
|
z_value = LLVMBuildTrunc(builder, z_value, z_bld.vec_type, ""); |
|
|
|
} |
|
|
|
|
|
|
|
if (mask) { |
|
|
|
zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst); |
|
|
|
} |
|
|
|
|
|
|
|
if (z_src_type.length == 4) { |
|
|
|
zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2); |
|
|
|
zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2); |
|
|
|
if (format_desc->block.bits <= 32) { |
|
|
|
if (z_src_type.length == 4) { |
|
|
|
zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); |
|
|
|
zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); |
|
|
|
} |
|
|
|
else { |
|
|
|
assert(z_src_type.length == 8); |
|
|
|
zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, |
|
|
|
LLVMConstVector(&shuffles[0], |
|
|
|
zs_load_type.length), ""); |
|
|
|
zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, |
|
|
|
LLVMConstVector(&shuffles[4], |
|
|
|
zs_load_type.length), ""); |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
assert(z_src_type.length == 8); |
|
|
|
zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value, |
|
|
|
LLVMConstVector(&shuffles[0], |
|
|
|
zs_load_type.length), |
|
|
|
""); |
|
|
|
zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value, |
|
|
|
LLVMConstVector(&shuffles[4], |
|
|
|
zs_load_type.length), |
|
|
|
""); |
|
|
|
|
|
|
|
if (z_src_type.length == 4) { |
|
|
|
zs_dst1 = lp_build_interleave2(gallivm, zs_type, |
|
|
|
z_value, s_value, 0); |
|
|
|
zs_dst2 = lp_build_interleave2(gallivm, zs_type, |
|
|
|
z_value, s_value, 1); |
|
|
|
} |
|
|
|
else { |
|
|
|
unsigned i; |
|
|
|
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; |
|
|
|
assert(z_src_type.length == 8); |
|
|
|
for (i = 0; i < 8; i++) { |
|
|
|
shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); |
|
|
|
shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + |
|
|
|
z_src_type.length); |
|
|
|
} |
|
|
|
zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, |
|
|
|
LLVMConstVector(&shuffles[0], |
|
|
|
z_src_type.length), ""); |
|
|
|
zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, |
|
|
|
LLVMConstVector(&shuffles[8], |
|
|
|
z_src_type.length), ""); |
|
|
|
} |
|
|
|
zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, |
|
|
|
lp_build_vec_type(gallivm, zs_load_type), ""); |
|
|
|
zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, |
|
|
|
lp_build_vec_type(gallivm, zs_load_type), ""); |
|
|
|
} |
|
|
|
|
|
|
|
LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); |
|
|
|
LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); |
|
|
|
} |
|
|
@@ -745,13 +816,14 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
struct lp_build_mask_context *mask, |
|
|
|
LLVMValueRef stencil_refs[2], |
|
|
|
LLVMValueRef z_src, |
|
|
|
LLVMValueRef zs_dst, |
|
|
|
LLVMValueRef z_fb, |
|
|
|
LLVMValueRef s_fb, |
|
|
|
LLVMValueRef face, |
|
|
|
LLVMValueRef *zs_value, |
|
|
|
LLVMValueRef *z_value, |
|
|
|
LLVMValueRef *s_value, |
|
|
|
boolean do_branch) |
|
|
|
{ |
|
|
|
LLVMBuilderRef builder = gallivm->builder; |
|
|
|
struct lp_type zs_type; |
|
|
|
struct lp_type z_type; |
|
|
|
struct lp_build_context z_bld; |
|
|
|
struct lp_build_context s_bld; |
|
|
@@ -763,7 +835,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
LLVMValueRef z_pass = NULL, s_pass_mask = NULL; |
|
|
|
LLVMValueRef orig_mask = lp_build_mask_value(mask); |
|
|
|
LLVMValueRef front_facing = NULL; |
|
|
|
|
|
|
|
boolean have_z, have_s; |
|
|
|
|
|
|
|
/* |
|
|
|
* Depths are expected to be between 0 and 1, even if they are stored in |
|
|
@@ -780,12 +852,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
} |
|
|
|
|
|
|
|
/* Pick the type matching the depth-stencil format. */ |
|
|
|
zs_type = lp_depth_type(format_desc, z_src_type.length); |
|
|
|
z_type = lp_depth_type(format_desc, z_src_type.length); |
|
|
|
|
|
|
|
/* Pick the intermediate type for depth operations. */ |
|
|
|
z_type = zs_type; |
|
|
|
/* FIXME: Cope with a depth test type with higher bit width. */ |
|
|
|
assert(zs_type.width <= z_src_type.width); |
|
|
|
z_type.width = z_src_type.width; |
|
|
|
assert(z_type.length == z_src_type.length); |
|
|
|
|
|
|
@@ -819,13 +888,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
|
|
|
|
if (depth->enabled) { |
|
|
|
assert(z_swizzle < 4); |
|
|
|
assert(format_desc->block.bits <= z_type.width); |
|
|
|
if (z_type.floating) { |
|
|
|
assert(z_swizzle == 0); |
|
|
|
assert(format_desc->channel[z_swizzle].type == |
|
|
|
UTIL_FORMAT_TYPE_FLOAT); |
|
|
|
assert(format_desc->channel[z_swizzle].size == |
|
|
|
format_desc->block.bits); |
|
|
|
assert(format_desc->channel[z_swizzle].size == 32); |
|
|
|
} |
|
|
|
else { |
|
|
|
assert(format_desc->channel[z_swizzle].type == |
|
|
@@ -849,7 +916,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
{ |
|
|
|
unsigned s_shift, s_mask; |
|
|
|
|
|
|
|
if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { |
|
|
|
z_dst = z_fb; |
|
|
|
stencil_vals = s_fb; |
|
|
|
|
|
|
|
have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); |
|
|
|
have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); |
|
|
|
|
|
|
|
if (have_z) { |
|
|
|
if (z_mask != 0xffffffff) { |
|
|
|
z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); |
|
|
|
} |
|
|
@@ -859,26 +932,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
*/ |
|
|
|
if (z_shift) { |
|
|
|
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); |
|
|
|
z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); |
|
|
|
z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); |
|
|
|
} else if (z_bitmask) { |
|
|
|
/* TODO: Instead of loading a mask from memory and ANDing, it's |
|
|
|
* probably faster to just shake the bits with two shifts. */ |
|
|
|
z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); |
|
|
|
z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); |
|
|
|
} else { |
|
|
|
z_dst = zs_dst; |
|
|
|
lp_build_name(z_dst, "z_dst"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { |
|
|
|
if (have_s) { |
|
|
|
if (s_shift) { |
|
|
|
LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); |
|
|
|
stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); |
|
|
|
stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); |
|
|
|
stencil_shift = shift; /* used below */ |
|
|
|
} |
|
|
|
else { |
|
|
|
stencil_vals = zs_dst; |
|
|
|
} |
|
|
|
|
|
|
|
if (s_mask != 0xffffffff) { |
|
|
|
LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); |
|
|
@@ -997,7 +1064,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
LLVMValueRef z_fail_mask, z_pass_mask; |
|
|
|
|
|
|
|
/* apply Z-fail operator */ |
|
|
|
z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); |
|
|
|
z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); |
|
|
|
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, |
|
|
|
stencil_refs, stencil_vals, |
|
|
|
z_fail_mask, front_facing); |
|
|
@@ -1019,8 +1086,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
s_pass_mask, front_facing); |
|
|
|
} |
|
|
|
|
|
|
|
/* Put Z and ztencil bits in the right place */ |
|
|
|
if (z_dst && z_shift) { |
|
|
|
/* Put Z and stencil bits in the right place */ |
|
|
|
if (have_z && z_shift) { |
|
|
|
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); |
|
|
|
z_dst = LLVMBuildShl(builder, z_dst, shift, ""); |
|
|
|
} |
|
|
@@ -1028,18 +1095,24 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
|
|
|
stencil_vals = LLVMBuildShl(builder, stencil_vals, |
|
|
|
stencil_shift, ""); |
|
|
|
|
|
|
|
/* Finally, merge/store the z/stencil values */ |
|
|
|
/* Finally, merge the z/stencil values */ |
|
|
|
if ((depth->enabled && depth->writemask) || |
|
|
|
(stencil[0].enabled && stencil[0].writemask)) { |
|
|
|
|
|
|
|
if (z_dst && stencil_vals) |
|
|
|
zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); |
|
|
|
else if (z_dst) |
|
|
|
zs_dst = z_dst; |
|
|
|
else |
|
|
|
zs_dst = stencil_vals; |
|
|
|
|
|
|
|
*zs_value = zs_dst; |
|
|
|
(stencil[0].enabled && (stencil[0].writemask || |
|
|
|
(stencil[1].enabled && stencil[1].writemask)))) { |
|
|
|
|
|
|
|
if (format_desc->block.bits <= 32) { |
|
|
|
if (have_z && have_s) |
|
|
|
*z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); |
|
|
|
else if (have_z) |
|
|
|
*z_value = z_dst; |
|
|
|
else |
|
|
|
*z_value = stencil_vals; |
|
|
|
*s_value = *z_value; |
|
|
|
} |
|
|
|
else { |
|
|
|
*z_value = z_dst; |
|
|
|
*s_value = stencil_vals; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (s_pass_mask) |