ソースを参照

gallivm: Compute lod as integer whenever possible.

More accurate/faster results for PIPE_TEX_MIPFILTER_NEAREST. Less
FP <-> SI conversion overall.
tags/snb-magic
José Fonseca 15年前
コミット
87dd859b34

+ 113
- 57
src/gallium/auxiliary/gallivm/lp_bld_sample.c ファイルの表示

@@ -167,6 +167,73 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
}


/**
* Generate code to compute coordinate gradient (rho).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
* \param width scalar int texture width
* \param height scalar int texture height
* \param depth scalar int texture depth
*
* XXX: The resulting rho is scalar, so we ignore all but the first element of
* derivatives that are passed by the shader.
*/
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
{
struct lp_build_context *float_bld = &bld->float_bld;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
LLVMValueRef dsdx, dsdy;
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;

dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
dsdx = lp_build_abs(float_bld, dsdx);
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
dsdy = lp_build_abs(float_bld, dsdy);
if (dims > 1) {
dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
dtdx = lp_build_abs(float_bld, dtdx);
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
dtdy = lp_build_abs(float_bld, dtdy);
if (dims > 2) {
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
drdx = lp_build_abs(float_bld, drdx);
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
drdy = lp_build_abs(float_bld, drdy);
}
}

/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
*/
rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);
}
}

return rho;
}


/**
* Generate code to compute texture level of detail (lambda).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
@@ -180,7 +247,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
* XXX: The resulting lod is scalar, so ignore all but the first element of
* derivatives, lod_bias, etc that are passed by the shader.
*/
LLVMValueRef
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
@@ -189,9 +256,18 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
LLVMValueRef depth,
unsigned mip_filter,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart)

{
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef lod;

*out_lod_ipart = bld->int_bld.zero;
*out_lod_fpart = bld->float_bld.zero;

if (bld->static_state->min_max_lod_equal) {
/* User is forcing sampling from a particular mipmap level.
* This is hit during mipmap generation.
@@ -199,68 +275,40 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef min_lod =
bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);

return min_lod;
lod = min_lod;
}
else {
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef sampler_lod_bias =
bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
LLVMValueRef lod;

if (explicit_lod) {
lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
index0, "");
}
else {
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef dsdx, dsdy;
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;

dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
dsdx = lp_build_abs(float_bld, dsdx);
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
dsdy = lp_build_abs(float_bld, dsdy);
if (dims > 1) {
dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
dtdx = lp_build_abs(float_bld, dtdx);
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
dtdy = lp_build_abs(float_bld, dtdy);
if (dims > 2) {
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
drdx = lp_build_abs(float_bld, drdx);
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
drdy = lp_build_abs(float_bld, drdy);
}
}
rho = lp_build_rho(bld, ddx, ddy, width, height, depth);

/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
*/
rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);
}
/* compute lod = log2(rho) */
if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
!lod_bias &&
!bld->static_state->lod_bias_non_zero &&
!bld->static_state->apply_max_lod &&
!bld->static_state->apply_min_lod) {
*out_lod_ipart = lp_build_ilog2(float_bld, rho);
*out_lod_fpart = bld->float_bld.zero;
return;
}

/* compute lod = log2(rho) */
#if 0
lod = lp_build_log2(float_bld, rho);
#else
lod = lp_build_fast_log2(float_bld, rho);
#endif
if (0) {
lod = lp_build_log2(float_bld, rho);
}
else {
lod = lp_build_fast_log2(float_bld, rho);
}

/* add shader lod bias */
if (lod_bias) {
@@ -288,9 +336,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,

lod = lp_build_max(float_bld, lod, min_lod);
}
}

return lod;
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
LLVMValueRef ipart = lp_build_ifloor(float_bld, lod);
lp_build_name(ipart, "lod_ipart");
*out_lod_ipart = ipart;
ipart = LLVMBuildSIToFP(bld->builder, ipart, float_bld->vec_type, "");
*out_lod_fpart = LLVMBuildFSub(bld->builder, lod, ipart, "lod_fpart");
}
else {
*out_lod_ipart = lp_build_iround(float_bld, lod);
}

return;
}


@@ -304,10 +363,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
unsigned unit,
LLVMValueRef lod,
LLVMValueRef lod_ipart,
LLVMValueRef *level_out)
{
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;

@@ -317,7 +375,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
bld->builder, unit);

/* convert float lod to integer */
level = lp_build_iround(float_bld, lod);
level = lod_ipart;

/* clamp level to legal range of levels */
*level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -332,12 +390,10 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
LLVMValueRef lod,
LLVMValueRef lod_ipart,
LLVMValueRef *level0_out,
LLVMValueRef *level1_out,
LLVMValueRef *weight_out)
LLVMValueRef *level1_out)
{
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;

@@ -345,7 +401,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
bld->builder, unit);

/* convert float lod to integer */
lp_build_ifloor_fract(float_bld, lod, &level, weight_out);
level = lod_ipart;

/* compute level 0 and clamp to legal range of levels */
*level0_out = lp_build_clamp(int_bld, level,

+ 7
- 5
src/gallium/auxiliary/gallivm/lp_bld_sample.h ファイルの表示

@@ -274,7 +274,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);


LLVMValueRef
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
@@ -283,7 +283,10 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth);
LLVMValueRef depth,
unsigned mip_filter,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart);

void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -294,10 +297,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
LLVMValueRef lod,
LLVMValueRef lod_ipart,
LLVMValueRef *level0_out,
LLVMValueRef *level1_out,
LLVMValueRef *weight_out);
LLVMValueRef *level1_out);

LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,

+ 21
- 19
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c ファイルの表示

@@ -882,13 +882,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
LLVMValueRef data_array,
LLVMValueRef texel_out[4])
{
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef lod = NULL, lod_fpart = NULL;
LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -936,7 +936,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
ddy = face_ddy;
}


/*
* Compute the level of detail (float).
*/
@@ -945,9 +944,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
lod = lp_build_lod_selector(bld, unit, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth);
lp_build_lod_selector(bld, unit, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth,
mip_filter,
&lod_ipart, &lod_fpart);
} else {
lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
}

/*
@@ -966,30 +969,29 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
assert(lod_ipart);
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
}
break;
case PIPE_TEX_MIPFILTER_NEAREST:
assert(lod);
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
assert(lod_ipart);
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
break;
case PIPE_TEX_MIPFILTER_LINEAR:
{
LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
LLVMValueRef i255 = lp_build_const_int32(255);
LLVMTypeRef i32_type = LLVMIntType(32);
LLVMTypeRef i16_type = LLVMIntType(16);

assert(lod);
assert(lod_fpart);

lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);

lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
&lod_fpart);
lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "");
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);

@@ -1049,9 +1051,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
lp_build_flow_scope_declare(flow_ctx, &packed_lo);
lp_build_flow_scope_declare(flow_ctx, &packed_hi);

/* minify = lod > 0.0 */
minify = LLVMBuildFCmp(builder, LLVMRealUGE,
lod, float_bld->zero, "");
/* minify = lod >= 0.0 */
minify = LLVMBuildICmp(builder, LLVMIntSGE,
lod_ipart, int_bld->zero, "");

lp_build_if(&if_ctx, flow_ctx, builder, minify);
{

+ 17
- 14
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c ファイルの表示

@@ -884,12 +884,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef data_array,
LLVMValueRef *colors_out)
{
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef lod = NULL, lod_fpart = NULL;
LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -935,9 +935,13 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
lod = lp_build_lod_selector(bld, unit, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth);
lp_build_lod_selector(bld, unit, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth,
mip_filter,
&lod_ipart, &lod_fpart);
} else {
lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
}

/*
@@ -950,22 +954,21 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
assert(lod_ipart);
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
}
}
else {
assert(lod);
assert(lod_ipart);
if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
&lod_fpart);
lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
}
}
@@ -1019,9 +1022,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);

/* minify = lod > 0.0 */
minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
lod, float_bld->zero, "");
/* minify = lod >= 0.0 */
minify = LLVMBuildICmp(bld->builder, LLVMIntSGE,
lod_ipart, int_bld->zero, "");

lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
{

読み込み中…
キャンセル
保存