Browse Source

ac: replace llvm.SI.tbuffer.store with llvm.amdgcn.buffer.store if ADD_TID=0

ADD_TID doesn't work. Needs more investigation.

v2: remove leftover dead code

Reviewed-by: Dave Airlie <airlied@redhat.com> (v1)
tags/17.1-branchpoint
Marek Olšák 9 years ago
parent
commit
97e21cfa25

+ 57
- 1
src/amd/common/ac_llvm_build.c View File

@@ -551,8 +551,64 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
LLVMValueRef soffset,
unsigned inst_offset,
bool glc,
bool slc)
bool slc,
bool writeonly_memory,
bool has_add_tid)
{
/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
/* Split 3 channel stores, becase LLVM doesn't support 3-channel
* intrinsics. */
if (num_channels == 3) {
LLVMValueRef v[3], v01;

for (int i = 0; i < 3; i++) {
v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
LLVMConstInt(ctx->i32, i, 0), "");
}
v01 = ac_build_gather_values(ctx, v, 2);

ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
soffset, inst_offset, glc, slc,
writeonly_memory, has_add_tid);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
soffset, inst_offset + 8,
glc, slc,
writeonly_memory, has_add_tid);
return;
}

unsigned func = CLAMP(num_channels, 1, 3) - 1;
static const char *types[] = {"f32", "v2f32", "v4f32"};
char name[256];
LLVMValueRef offset = soffset;

if (inst_offset)
offset = LLVMBuildAdd(ctx->builder, offset,
LLVMConstInt(ctx->i32, inst_offset, 0), "");
if (voffset)
offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");

LLVMValueRef args[] = {
bitcast_to_float(ctx, vdata),
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
LLVMConstInt(ctx->i32, 0, 0),
offset,
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0),
};

snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
types[func]);

ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
args, ARRAY_SIZE(args),
writeonly_memory ?
AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
AC_FUNC_ATTR_WRITEONLY);
return;
}

static unsigned dfmt[] = {
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_DATA_FORMAT_32_32,

+ 3
- 1
src/amd/common/ac_llvm_build.h View File

@@ -130,7 +130,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
LLVMValueRef soffset,
unsigned inst_offset,
bool glc,
bool slc);
bool slc,
bool writeonly_memory,
bool has_add_tid);
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,

+ 2
- 2
src/amd/common/ac_nir_to_llvm.c View File

@@ -3159,7 +3159,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
out_val, 1,
voffset, ctx->gs2vs_offset, 0,
1, 1);
1, 1, true, true);
}
idx += slot_inc;
}
@@ -4675,7 +4675,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
out_val, 1,
NULL, ctx->es2gs_offset,
(4 * param_index + j + start) * 4,
1, 1);
1, 1, true, true);
}
}
ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;

+ 11
- 11
src/gallium/drivers/radeonsi/si_shader.c View File

@@ -1049,7 +1049,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
buf_addr, base,
4 * chan_index, 1, 0);
4 * chan_index, 1, 0, true, false);
}
}

@@ -1057,7 +1057,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
base, 0, 1, 0);
base, 0, 1, 0, true, false);
}
}

@@ -2087,7 +2087,7 @@ static void emit_streamout_output(struct si_shader_context *ctx,
vdata, num_comps,
so_write_offsets[buf_idx],
LLVMConstInt(ctx->i32, 0, 0),
stream_out->dst_offset * 4, 1, 1);
stream_out->dst_offset * 4, 1, 1, true, false);
}

/**
@@ -2412,7 +2412,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
lds_ptr);

ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
buffer_offset, 0, 1, 0);
buffer_offset, 0, 1, 0, true, false);
}
}

@@ -2527,18 +2527,18 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
ac_build_buffer_store_dword(&ctx->ac, buffer,
lp_build_const_int32(gallivm, 0x80000000),
1, lp_build_const_int32(gallivm, 0), tf_base,
0, 1, 0);
0, 1, 0, true, false);

lp_build_endif(&inner_if_ctx);

/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
MIN2(stride, 4), byteoffset, tf_base,
4, 1, 0);
4, 1, 0, true, false);
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
stride - 4, byteoffset, tf_base,
20, 1, 0);
20, 1, 0, true, false);

/* Store the tess factors into the offchip buffer if TES reads them. */
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
@@ -2560,7 +2560,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,

ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
outer_comps, tf_outer_offset,
base, 0, 1, 0);
base, 0, 1, 0, true, false);
if (inner_comps) {
param_inner = si_shader_io_get_unique_index(
TGSI_SEMANTIC_TESSINNER, 0);
@@ -2571,7 +2571,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
lp_build_gather_values(gallivm, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
inner_comps, tf_inner_offset,
base, 0, 1, 0);
base, 0, 1, 0, true, false);
}
}

@@ -2695,7 +2695,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
ctx->esgs_ring,
out_val, 1, NULL, soffset,
(4 * param_index + chan) * 4,
1, 1);
1, 1, true, true);
}
}
}
@@ -5063,7 +5063,7 @@ static void si_llvm_emit_vertex(
ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
1, 1);
1, 1, true, true);
}
}


Loading…
Cancel
Save