瀏覽代碼

gallivm: Vectorize the rho computation.

tags/snb-magic
José Fonseca 15 年之前
父節點
當前提交
321ec1a224

+ 56
- 36
src/gallium/auxiliary/gallivm/lp_bld_sample.c 查看文件

@@ -171,9 +171,6 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
* Generate code to compute coordinate gradient (rho).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
* \param width scalar int texture width
* \param height scalar int texture height
* \param depth scalar int texture depth
*
* XXX: The resulting rho is scalar, so we ignore all but the first element of
* derivatives that are passed by the shader.
@@ -181,52 +178,75 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
const LLVMValueRef ddy[4])
{
struct lp_build_context *float_size_bld = &bld->float_size_bld;
struct lp_build_context *float_bld = &bld->float_bld;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
LLVMValueRef dsdx, dsdy;
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
LLVMValueRef rho_x, rho_y;
LLVMValueRef rho_vec;
LLVMValueRef float_size;
LLVMValueRef rho;

dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
dsdx = lp_build_abs(float_bld, dsdx);
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
dsdy = lp_build_abs(float_bld, dsdy);
if (dims > 1) {

if (dims <= 1) {
rho_x = dsdx;
rho_y = dsdy;
}
else {
rho_x = float_size_bld->undef;
rho_y = float_size_bld->undef;

rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");

dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
dtdx = lp_build_abs(float_bld, dtdx);
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
dtdy = lp_build_abs(float_bld, dtdy);
if (dims > 2) {

rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");

if (dims >= 3) {
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
drdx = lp_build_abs(float_bld, drdx);
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
drdy = lp_build_abs(float_bld, drdy);

rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
}
}

/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
*/
rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);
rho_x = lp_build_abs(float_size_bld, rho_x);
rho_y = lp_build_abs(float_size_bld, rho_y);

rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);

float_size = lp_build_int_to_float(float_size_bld, bld->uint_size);

rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);

if (dims <= 1) {
rho = rho_vec;
}
else {
if (dims >= 2) {
LLVMValueRef rho_s, rho_t, rho_r;

rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");

rho = lp_build_max(float_bld, rho_s, rho_t);

if (dims >= 3) {
rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
rho = lp_build_max(float_bld, rho, rho_r);
}
}
}

@@ -289,7 +309,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
else {
LLVMValueRef rho;

rho = lp_build_rho(bld, ddx, ddy, width, height, depth);
rho = lp_build_rho(bld, ddx, ddy);

/* compute lod = log2(rho) */
if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||

+ 12
- 1
src/gallium/auxiliary/gallivm/lp_bld_sample.h 查看文件

@@ -202,9 +202,20 @@ struct lp_build_sample_context
struct lp_type int_coord_type;
struct lp_build_context int_coord_bld;

/** Unsigned integer texture size */
struct lp_type uint_size_type;
struct lp_build_context uint_size_bld;

/** Unsigned integer texture size */
struct lp_type float_size_type;
struct lp_build_context float_size_bld;

/** Output texels type and build context */
struct lp_type texel_type;
struct lp_build_context texel_bld;

/** Unsigned vector with texture width, height, depth */
LLVMValueRef uint_size;
};


@@ -241,7 +252,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
}


static INLINE int
static INLINE unsigned
texture_dims(enum pipe_texture_target tex)
{
switch (tex) {

+ 24
- 0
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 查看文件

@@ -1141,7 +1141,9 @@ lp_build_sample_soa(LLVMBuilderRef builder,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef texel_out[4])
{
unsigned dims = texture_dims(static_state->target);
struct lp_build_sample_context bld;
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef width, width_vec;
LLVMValueRef height, height_vec;
LLVMValueRef depth, depth_vec;
@@ -1171,6 +1173,9 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.coord_type = type;
bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
bld.float_size_type = lp_type_float(32);
bld.float_size_type.length = dims > 1 ? 4 : 1;
bld.uint_size_type = lp_uint_type(bld.float_size_type);
bld.texel_type = type;

float_vec_type = lp_type_float_vec(32);
@@ -1181,6 +1186,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
lp_build_context_init(&bld.uint_size_bld, builder, bld.uint_size_type);
lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type);
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);

/* Get the dynamic state */
@@ -1196,6 +1203,23 @@ lp_build_sample_soa(LLVMBuilderRef builder,
t = coords[1];
r = coords[2];

/* width, height, depth as single uint vector */
if (dims <= 1) {
bld.uint_size = width;
}
else {
bld.uint_size = LLVMBuildInsertElement(builder, bld.uint_size_bld.undef,
width, LLVMConstInt(i32t, 0, 0), "");
if (dims >= 2) {
bld.uint_size = LLVMBuildInsertElement(builder, bld.uint_size,
height, LLVMConstInt(i32t, 1, 0), "");
if (dims >= 3) {
bld.uint_size = LLVMBuildInsertElement(builder, bld.uint_size,
depth, LLVMConstInt(i32t, 2, 0), "");
}
}
}

/* width, height, depth as uint vectors */
width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);

Loading…
取消
儲存