Browse Source

gallivm,llvmpipe: handle TXF (texelFetch) instruction, including offsets

This also adds some code to handle per-quad lods for more than 4-wide fetches,
because otherwise I'd have to integrate the texelFetch function into
the splitting stuff... (but it is not used yet outside texelFetch).
passes piglit fs-texelFetch-2D, fails fs-texelFetchOffset-2D due to I believe
a test error (results are undefined for out-of-bounds fetches, we return
whatever is at offset 0, whereas the test expects [0,0,0,1]).
Texel offsets are only handled by texelFetch for now, though the interface
can handle it for everything.

Reviewed-by: José Fonseca <jfonseca@vmware.com>
tags/gles3-fmt-v1
Roland Scheidegger 12 years ago
parent
commit
0b6554ba6f

+ 6
- 3
src/gallium/auxiliary/draw/draw_llvm_sample.c View File

@@ -171,9 +171,10 @@ static void
draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
struct gallivm_state *gallivm,
struct lp_type type,
boolean is_fetch,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
@@ -187,8 +188,10 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
type,
is_fetch,
unit,
num_coords, coords,
coords,
offsets,
derivs,
lod_bias, explicit_lod,
texel);
@@ -213,7 +216,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
lp_build_size_query_soa(gallivm,
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
type,
type,
unit,
explicit_lod,
sizes_out);

+ 235
- 33
src/gallium/auxiliary/gallivm/lp_bld_sample.c View File

@@ -186,8 +186,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
const struct lp_derivatives *derivs)
{
struct gallivm_state *gallivm = bld->gallivm;
struct lp_build_context *int_size_bld = &bld->int_size_bld;
struct lp_build_context *float_size_bld = &bld->float_size_bld;
struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
@@ -316,7 +316,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
}
}
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
perquadf_bld->type, rho);
perquadf_bld->type, rho, 0);
}
else {
if (dims <= 1) {
@@ -517,7 +517,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
else {
if (explicit_lod) {
lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
perquadf_bld->type, explicit_lod);
perquadf_bld->type, explicit_lod, 0);
}
else {
LLVMValueRef rho;
@@ -562,7 +562,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
/* add shader lod bias */
if (lod_bias) {
lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
perquadf_bld->type, lod_bias);
perquadf_bld->type, lod_bias, 0);
lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
}
}
@@ -725,7 +725,6 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,

/**
* Return pointer to a single mipmap level.
* \param data_array array of pointers to mipmap levels
* \param level integer mipmap level
*/
LLVMValueRef
@@ -743,6 +742,55 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
return data_ptr;
}

/**
* Return (per-pixel) offsets to mip levels.
* \param level integer mipmap level
*/
LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
LLVMValueRef level)
{
LLVMBuilderRef builder = bld->gallivm->builder;
LLVMValueRef indexes[2], offsets, offset1;

indexes[0] = lp_build_const_int32(bld->gallivm, 0);
if (bld->num_lods == 1) {
indexes[1] = level;
offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
offset1 = LLVMBuildLoad(builder, offset1, "");
offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
}
else if (bld->num_lods == bld->coord_bld.type.length / 4) {
unsigned i;

offsets = bld->int_coord_bld.undef;
for (i = 0; i < bld->num_lods; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
offset1 = LLVMBuildLoad(builder, offset1, "");
offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
}
offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
}
else {
unsigned i;

assert (bld->num_lods == bld->coord_bld.type.length);

offsets = bld->int_coord_bld.undef;
for (i = 0; i < bld->num_lods; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
offset1 = LLVMBuildLoad(builder, offset1, "");
offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
}
}
return offsets;
}


/**
* Codegen equivalent for u_minify().
@@ -780,12 +828,44 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
LLVMValueRef stride_array, LLVMValueRef level)
{
LLVMBuilderRef builder = bld->gallivm->builder;
LLVMValueRef indexes[2], stride;
LLVMValueRef indexes[2], stride, stride1;
indexes[0] = lp_build_const_int32(bld->gallivm, 0);
indexes[1] = level;
stride = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
stride = LLVMBuildLoad(builder, stride, "");
stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
if (bld->num_lods == 1) {
indexes[1] = level;
stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
stride1 = LLVMBuildLoad(builder, stride1, "");
stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
}
else if (bld->num_lods == bld->coord_bld.type.length / 4) {
LLVMValueRef stride1;
unsigned i;

stride = bld->int_coord_bld.undef;
for (i = 0; i < bld->num_lods; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
stride1 = LLVMBuildLoad(builder, stride1, "");
stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
}
stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
}
else {
LLVMValueRef stride1;
unsigned i;

assert (bld->num_lods == bld->coord_bld.type.length);

stride = bld->int_coord_bld.undef;
for (i = 0; i < bld->coord_bld.type.length; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
stride1 = LLVMBuildLoad(builder, stride1, "");
stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
}
}
return stride;
}

@@ -805,12 +885,102 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
const unsigned dims = bld->dims;
LLVMValueRef ilevel_vec;

ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);

/*
* Compute width, height, depth at mipmap level 'ilevel'
*/
*out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
if (bld->num_lods == 1) {
ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
*out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
}
else {
LLVMValueRef int_size_vec;
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned num_quads = bld->coord_bld.type.length / 4;
unsigned i;

if (bld->num_lods == num_quads) {
/*
* XXX: this should be #ifndef SANE_INSTRUCTION_SET.
* intel "forgot" the variable shift count instruction until avx2.
* A harmless 8x32 shift gets translated into 32 instructions
* (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
* unable to recognize if there are really just 2 different shift
* count values. So do the shift 4-wide before expansion.
*/
struct lp_build_context bld4;
struct lp_type type4;

type4 = bld->int_coord_bld.type;
type4.length = 4;

lp_build_context_init(&bld4, bld->gallivm, type4);

if (bld->dims == 1) {
assert(bld->int_size_in_bld.type.length == 1);
int_size_vec = lp_build_broadcast_scalar(&bld4,
bld->int_size);
}
else {
assert(bld->int_size_in_bld.type.length == 4);
int_size_vec = bld->int_size;
}

for (i = 0; i < num_quads; i++) {
LLVMValueRef ileveli;
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);

ileveli = lp_build_extract_broadcast(bld->gallivm,
bld->perquadi_bld.type,
bld4.type,
ilevel,
indexi);
tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli);
}
/*
* out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
* [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
*/
*out_size = lp_build_concat(bld->gallivm,
tmp,
bld4.type,
num_quads);
}
else {
/* FIXME: this is terrible and results in _huge_ vector
* (for the dims > 1 case).
* Should refactor this (together with extract_image_sizes) and do
* something more useful. Could for instance if we have width,height
* with 4-wide vector pack all elements into a 8xi16 vector
* (on which we can still do useful math) instead of using a 16xi32
* vector.
* FIXME: some callers can't handle this yet.
* For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
* For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
*/
assert(bld->num_lods == bld->coord_bld.type.length);
if (bld->dims == 1) {
assert(bld->int_size_bld.type.length == 1);
int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
bld->int_size);
/* vector shift with variable shift count alert... */
*out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel);
}
else {
LLVMValueRef ilevel1;
for (i = 0; i < bld->num_lods; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
bld->int_size_in_bld.type, ilevel, indexi);
tmp[i] = bld->int_size;
tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
}
int_size_vec = lp_build_concat(bld->gallivm,
tmp,
bld->int_size_in_bld.type,
bld->num_lods);
}
}
}

if (dims >= 2) {
*row_stride_vec = lp_build_get_level_stride_vec(bld,
@@ -836,7 +1006,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
*/
void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
struct lp_type size_type,
struct lp_build_context *size_bld,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
@@ -845,24 +1015,56 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
{
const unsigned dims = bld->dims;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
struct lp_type size_type = size_bld->type;

if (bld->num_lods == 1) {
*out_width = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 0, 0));
if (dims >= 2) {
*out_height = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 1, 0));
if (dims == 3) {
*out_depth = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 2, 0));
}
}
}
else {
unsigned num_quads = bld->coord_bld.type.length / 4;

*out_width = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 0, 0));
if (dims >= 2) {
*out_height = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 1, 0));
if (dims == 3) {
*out_depth = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 2, 0));
if (dims == 1) {
*out_width = size;
}
else if (bld->num_lods == num_quads) {
*out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
if (dims >= 2) {
*out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
if (dims == 3) {
*out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
}
}
}
else {
assert(bld->num_lods == bld->coord_type.length);
*out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
coord_type, size, 0);
if (dims >= 2) {
*out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
coord_type, size, 1);
if (dims == 3) {
*out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
coord_type, size, 2);
}
}
}
}
}
@@ -886,7 +1088,7 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
LLVMValueRef depth;

lp_build_extract_image_sizes(bld,
bld->float_size_type,
&bld->float_size_bld,
bld->coord_type,
flt_size,
&width,

+ 21
- 4
src/gallium/auxiliary/gallivm/lp_bld_sample.h View File

@@ -210,6 +210,9 @@ struct lp_build_sample_context
/** SIMD vector width */
unsigned vector_width;

/** number of lod values (valid are 1, length/4, length) */
unsigned num_lods;

/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;
@@ -230,10 +233,18 @@ struct lp_build_sample_context
struct lp_build_context int_coord_bld;

/** Unsigned integer texture size */
struct lp_type int_size_in_type;
struct lp_build_context int_size_in_bld;

/** Float incoming texture size */
struct lp_type float_size_in_type;
struct lp_build_context float_size_in_bld;

/** Unsigned integer texture size (might be per quad) */
struct lp_type int_size_type;
struct lp_build_context int_size_bld;

/** Unsigned integer texture size */
/** Float texture size (might be per quad) */
struct lp_type float_size_type;
struct lp_build_context float_size_bld;

@@ -298,6 +309,7 @@ texture_dims(enum pipe_texture_target tex)
{
switch (tex) {
case PIPE_TEXTURE_1D:
case PIPE_BUFFER:
return 1;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
@@ -355,6 +367,11 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef level);


LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
LLVMValueRef level);


void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
LLVMValueRef ilevel,
@@ -365,7 +382,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,

void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
struct lp_type size_type,
struct lp_build_context *size_bld,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
@@ -418,9 +435,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
const struct lp_sampler_static_state *static_state,
struct lp_sampler_dynamic_state *dynamic_state,
struct lp_type fp_type,
boolean is_fetch,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias,
LLVMValueRef explicit_lod,
@@ -448,7 +466,6 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
void
lp_build_sample_nop(struct gallivm_state *gallivm,
struct lp_type type,
unsigned num_coords,
const LLVMValueRef *coords,
LLVMValueRef texel_out[4]);


+ 4
- 4
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c View File

@@ -539,7 +539,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);

lp_build_extract_image_sizes(bld,
bld->int_size_type,
&bld->int_size_bld,
bld->int_coord_type,
int_size,
&width_vec,
@@ -661,7 +661,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);

lp_build_extract_image_sizes(bld,
bld->float_size_type,
&bld->float_size_bld,
bld->coord_type,
flt_size,
&width_vec,
@@ -994,7 +994,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);

lp_build_extract_image_sizes(bld,
bld->int_size_type,
&bld->int_size_bld,
bld->int_coord_type,
int_size,
&width_vec,
@@ -1175,7 +1175,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);

lp_build_extract_image_sizes(bld,
bld->float_size_type,
&bld->float_size_bld,
bld->coord_type,
flt_size,
&width_vec,

+ 153
- 14
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c View File

@@ -610,7 +610,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef x, y, z;

lp_build_extract_image_sizes(bld,
bld->int_size_type,
&bld->int_size_bld,
bld->int_coord_type,
size,
&width_vec, &height_vec, &depth_vec);
@@ -618,7 +618,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
flt_size = lp_build_int_to_float(&bld->float_size_bld, size);

lp_build_extract_image_sizes(bld,
bld->float_size_type,
&bld->float_size_bld,
bld->coord_type,
flt_size,
&flt_width_vec, &flt_height_vec, &flt_depth_vec);
@@ -695,7 +695,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
int chan;

lp_build_extract_image_sizes(bld,
bld->int_size_type,
&bld->int_size_bld,
bld->int_coord_type,
size,
&width_vec, &height_vec, &depth_vec);
@@ -703,7 +703,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
flt_size = lp_build_int_to_float(&bld->float_size_bld, size);

lp_build_extract_image_sizes(bld,
bld->float_size_type,
&bld->float_size_bld,
bld->coord_type,
flt_size,
&flt_width_vec, &flt_height_vec, &flt_depth_vec);
@@ -1157,6 +1157,120 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}


/**
* Texel fetch function.
* In contrast to general sampling there is no filtering, no coord minification,
* lod (if any) is always explicit uint, coords are uints (in terms of texel units)
* directly to be applied to the selected mip level (after adding texel offsets).
* This function handles texel fetch for all targets where texel fetch is supported
* (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
*/
static void
lp_build_fetch_texel(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef *coords,
LLVMValueRef explicit_lod,
const LLVMValueRef *offsets,
LLVMValueRef *colors_out)
{
struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
unsigned dims = bld->dims, chan;
LLVMValueRef size, ilevel;
LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
LLVMValueRef width, height, depth, i, j;
LLVMValueRef offset, out_of_bounds, out1;

/* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */
if (explicit_lod && bld->static_state->target != PIPE_BUFFER) {
/* could also avoid this if there are no mipmaps */
/* XXX temporary hack until ordinary sampling handles per-quad lod the same */
bld->num_lods = bld->coord_type.length / 4;
bld->float_size_type = bld->float_size_in_type;
bld->float_size_type.length = bld->num_lods > 1 ? bld->coord_type.length :
bld->float_size_in_type.length;
bld->int_size_type = lp_int_type(bld->float_size_type);
lp_build_context_init(&bld->int_size_bld, bld->gallivm, bld->int_size_type);
lp_build_context_init(&bld->float_size_bld, bld->gallivm, bld->float_size_type);

ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
perquadi_bld->type, explicit_lod, 0);
lp_build_nearest_mip_level(bld, unit, ilevel, &ilevel);
}
else {
bld->num_lods = 1;
ilevel = lp_build_const_int32(bld->gallivm, 0);
}
lp_build_mipmap_level_sizes(bld, ilevel,
&size,
&row_stride_vec, &img_stride_vec);
lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
size, &width, &height, &depth);

/* This is a lot like border sampling */
if (offsets[0]) {
/* XXX coords are really unsigned, offsets are signed */
x = lp_build_add(int_coord_bld, x, offsets[0]);
}
out_of_bounds = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);

if (dims >= 2) {
if (offsets[1]) {
y = lp_build_add(int_coord_bld, y, offsets[1]);
}
out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);

if (dims >= 3) {
if (offsets[2]) {
z = lp_build_add(int_coord_bld, z, offsets[2]);
}
out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
}
}

lp_build_sample_offset(int_coord_bld,
bld->format_desc,
x, y, z, row_stride_vec, img_stride_vec,
&offset, &i, &j);

if (bld->static_state->target != PIPE_BUFFER) {
offset = lp_build_add(int_coord_bld, offset,
lp_build_get_mip_offsets(bld, ilevel));
}

offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);

lp_build_fetch_rgba_soa(bld->gallivm,
bld->format_desc,
bld->texel_type,
bld->base_ptr, offset,
i, j,
colors_out);

if (0) {
/*
* Not needed except for ARB_robust_buffer_access_behavior.
* Could use min/max above instead of out-of-bounds comparisons
* (in fact cast to unsigned and min only is sufficient)
* if we don't care about the result returned for out-of-bounds.
*/
for (chan = 0; chan < 4; chan++) {
colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
bld->texel_bld.zero, colors_out[chan]);
}
}
}


/**
* Do shadow test/comparison.
* \param p the texcoord Z (aka R, aka P) component
@@ -1209,7 +1323,6 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
void
lp_build_sample_nop(struct gallivm_state *gallivm,
struct lp_type type,
unsigned num_coords,
const LLVMValueRef *coords,
LLVMValueRef texel_out[4])
{
@@ -1227,6 +1340,7 @@ lp_build_sample_nop(struct gallivm_state *gallivm,
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
* \param type vector float type to use for coords, etc.
* \param is_fetch if this is a texel fetch instruction.
* \param derivs partial derivatives of (s,t,r,q) with respect to x and y
*/
void
@@ -1234,9 +1348,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
const struct lp_sampler_static_state *static_state,
struct lp_sampler_dynamic_state *dynamic_state,
struct lp_type type,
boolean is_fetch,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
@@ -1272,20 +1387,28 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld.int_type = lp_type_int(32);
bld.coord_type = type;
bld.int_coord_type = lp_int_type(type);
bld.float_size_type = lp_type_float(32);
bld.float_size_type.length = dims > 1 ? 4 : 1;
bld.int_size_type = lp_int_type(bld.float_size_type);
bld.float_size_in_type = lp_type_float(32);
bld.float_size_in_type.length = dims > 1 ? 4 : 1;
bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
bld.texel_type = type;
bld.perquadf_type = type;
/* we want native vector size to be able to use our intrinsics */
bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
bld.perquadi_type = lp_int_type(bld.perquadf_type);

bld.num_lods = 1;
bld.float_size_type = bld.float_size_in_type;
bld.float_size_type.length = bld.num_lods > 1 ? type.length :
bld.float_size_in_type.length;
bld.int_size_type = lp_int_type(bld.float_size_type);

lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
lp_build_context_init(&bld.float_vec_bld, gallivm, type);
lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
@@ -1311,7 +1434,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld.int_size = tex_width;
}
else {
bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef,
bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
tex_width, LLVMConstInt(i32t, 0, 0), "");
if (dims >= 2) {
bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
@@ -1327,7 +1450,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
/* For debug: no-op texture sampling */
lp_build_sample_nop(gallivm,
bld.texel_type,
num_coords,
coords,
texel_out);
}
@@ -1352,6 +1474,18 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
static_state->wrap_t);
}

if (is_fetch) {
lp_build_fetch_texel(&bld, unit, coords,
explicit_lod, offsets,
texel_out);

if (static_state->target != PIPE_BUFFER) {
apply_sampler_swizzle(&bld, texel_out);
}

return;
}

lp_build_sample_common(&bld, unit,
&s, &t, &r,
derivs, lod_bias, explicit_lod,
@@ -1450,20 +1584,25 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld4.int_type = lp_type_int(32);
bld4.coord_type = type4;
bld4.int_coord_type = lp_int_type(type4);
bld4.float_size_type = lp_type_float(32);
bld4.float_size_type.length = dims > 1 ? 4 : 1;
bld4.int_size_type = lp_int_type(bld4.float_size_type);
bld4.float_size_in_type = lp_type_float(32);
bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
bld4.float_size_type = bld4.float_size_in_type;
bld4.int_size_type = bld4.int_size_in_type;
bld4.texel_type = type4;
bld4.perquadf_type = type4;
/* we want native vector size to be able to use our intrinsics */
bld4.perquadf_type.length = 1;
bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
bld4.num_lods = 1;

lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);

+ 5
- 4
src/gallium/auxiliary/gallivm/lp_bld_swizzle.c View File

@@ -554,15 +554,16 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,


/**
* Pack first element of aos values,
* Pack n-th element of aos values,
* pad out to destination size.
* i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
* i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
*/
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type dst_type,
const LLVMValueRef src)
const LLVMValueRef src,
unsigned channel)
{
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef undef = LLVMGetUndef(i32t);
@@ -574,7 +575,7 @@ lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
assert(num_src <= num_dst);

for (i = 0; i < num_src; i++) {
shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
}
for (i = num_src; i < num_dst; i++) {
shuffles[i] = undef;

+ 2
- 1
src/gallium/auxiliary/gallivm/lp_bld_swizzle.h View File

@@ -117,7 +117,8 @@ LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type dst_type,
const LLVMValueRef src);
const LLVMValueRef src,
unsigned channel);


LLVMValueRef

+ 60
- 0
src/gallium/auxiliary/gallivm/lp_bld_tgsi.c View File

@@ -334,6 +334,66 @@ lp_build_emit_fetch(

}


LLVMValueRef
lp_build_emit_fetch_texoffset(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned tex_off_op,
const unsigned chan_index)
{
const struct tgsi_texture_offset *off = &inst->TexOffsets[tex_off_op];
struct tgsi_full_src_register reg;
unsigned swizzle;
LLVMValueRef res;
enum tgsi_opcode_type stype = TGSI_TYPE_SIGNED;

/* convert offset "register" to ordinary register so can use normal emit funcs */
memset(&reg, 0, sizeof(reg));
reg.Register.File = off->File;
reg.Register.Index = off->Index;
reg.Register.SwizzleX = off->SwizzleX;
reg.Register.SwizzleY = off->SwizzleY;
reg.Register.SwizzleZ = off->SwizzleZ;

if (chan_index == LP_CHAN_ALL) {
swizzle = ~0;
} else {
swizzle = tgsi_util_get_src_register_swizzle(&reg.Register, chan_index);
if (swizzle > 2) {
assert(0 && "invalid swizzle in emit_fetch_texoffset()");
return bld_base->base.undef;
}
}

assert(off->Index <= bld_base->info->file_max[off->File]);

if (bld_base->emit_fetch_funcs[off->File]) {
res = bld_base->emit_fetch_funcs[off->File](bld_base, &reg, stype,
swizzle);
} else {
assert(0 && "invalid src register in emit_fetch_texoffset()");
return bld_base->base.undef;
}

/*
* Swizzle the argument
*/

if (swizzle == ~0) {
res = bld_base->emit_swizzle(bld_base, res,
off->SwizzleX,
off->SwizzleY,
off->SwizzleZ,
/* there's no 4th channel */
off->SwizzleX);
}

return res;

}


boolean
lp_build_tgsi_llvm(
struct lp_build_tgsi_context * bld_base,

+ 10
- 1
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h View File

@@ -172,9 +172,10 @@ struct lp_build_sampler_soa
(*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler,
struct gallivm_state *gallivm,
struct lp_type type,
boolean is_fetch,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
@@ -555,6 +556,14 @@ lp_build_emit_fetch(
unsigned src_op,
const unsigned chan_index);


LLVMValueRef
lp_build_emit_fetch_texoffset(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned tex_off_op,
const unsigned chan_index);

boolean
lp_build_tgsi_llvm(
struct lp_build_tgsi_context * bld_base,

+ 112
- 3
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c View File

@@ -1146,7 +1146,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
unsigned unit;
LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
LLVMValueRef coords[4];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
unsigned num_coords;
unsigned dims;
@@ -1225,7 +1226,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
}
for (i = num_coords; i < 3; i++) {
for (i = num_coords; i < 4; i++) {
coords[i] = bld->bld_base.base.undef;
}

@@ -1285,15 +1286,111 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
unit = inst->Src[1].Register.Index;
}

/* some advanced gather instructions (txgo) would require 4 offsets */
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
for (dim = 0; dim < dims; dim++) {
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
}
}

bld->sampler->emit_fetch_texel(bld->sampler,
bld->bld_base.base.gallivm,
bld->bld_base.base.type,
unit, num_coords, coords,
FALSE,
unit, coords,
offsets,
&derivs,
lod_bias, explicit_lod,
texel);
}

static void
emit_txf( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
LLVMValueRef *texel)
{
unsigned unit;
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
LLVMValueRef explicit_lod = NULL;
LLVMValueRef coords[3];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
unsigned num_coords;
unsigned dims;
unsigned i;

if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++) {
texel[i] = coord_undef;
}
return;
}

derivs.ddx_ddy[0] = coord_undef;
derivs.ddx_ddy[1] = coord_undef;

switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_BUFFER:
num_coords = 1;
dims = 1;
break;
case TGSI_TEXTURE_1D_ARRAY:
num_coords = 2;
dims = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
num_coords = 2;
dims = 2;
break;
case TGSI_TEXTURE_2D_ARRAY:
num_coords = 3;
dims = 2;
break;
case TGSI_TEXTURE_3D:
num_coords = 3;
dims = 3;
break;
default:
assert(0);
return;
}

/* always have lod except for buffers ? */
if (inst->Texture.Texture != TGSI_TEXTURE_BUFFER) {
explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
}

for (i = 0; i < num_coords; i++) {
coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
}
for (i = num_coords; i < 3; i++) {
coords[i] = coord_undef;
}

unit = inst->Src[1].Register.Index;

if (inst->Texture.NumOffsets == 1) {
unsigned dim;
for (dim = 0; dim < dims; dim++) {
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
}
}

bld->sampler->emit_fetch_texel(bld->sampler,
bld->bld_base.base.gallivm,
bld->bld_base.base.type,
TRUE,
unit, coords,
offsets,
&derivs,
NULL, explicit_lod,
texel);
}

static void
emit_txq( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
@@ -1755,6 +1852,17 @@ txq_emit(
emit_txq(bld, emit_data->inst, emit_data->output);
}

static void
txf_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);

emit_txf(bld, emit_data->inst, emit_data->output);
}

static void
cal_emit(
const struct lp_build_tgsi_action * action,
@@ -2126,6 +2234,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;

lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);


+ 1
- 0
src/gallium/auxiliary/tgsi/tgsi_info.c View File

@@ -293,6 +293,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_USHR:
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_TXF:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_MOD:
case TGSI_OPCODE_I2F:

+ 3
- 1
src/gallium/drivers/llvmpipe/lp_screen.c View File

@@ -175,9 +175,11 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 0;
case PIPE_CAP_SCALED_RESOLVE:
return 0;
/* this is a lie could support arbitrary large offsets */
case PIPE_CAP_MIN_TEXEL_OFFSET:
return -8;
case PIPE_CAP_MAX_TEXEL_OFFSET:
return 0;
return 7;
case PIPE_CAP_CONDITIONAL_RENDER:
return 1;
case PIPE_CAP_TEXTURE_BARRIER:

+ 6
- 3
src/gallium/drivers/llvmpipe/lp_tex_sample.c View File

@@ -176,9 +176,10 @@ static void
lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
struct gallivm_state *gallivm,
struct lp_type type,
boolean is_fetch,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
@@ -189,7 +190,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
assert(unit < PIPE_MAX_SAMPLERS);
if (LP_PERF & PERF_NO_TEX) {
lp_build_sample_nop(gallivm, type, num_coords, coords, texel);
lp_build_sample_nop(gallivm, type, coords, texel);
return;
}

@@ -197,8 +198,10 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
type,
is_fetch,
unit,
num_coords, coords,
coords,
offsets,
derivs,
lod_bias, explicit_lod,
texel);

Loading…
Cancel
Save