compute shaders need kernel input support Acked-by: Roland Scheidegger <sroland@vmware.com>master
@@ -1152,6 +1152,17 @@ static void visit_discard(struct lp_build_nir_context *bld_base, | |||
bld_base->discard(bld_base, cond); | |||
} | |||
static void visit_load_kernel_input(struct lp_build_nir_context *bld_base, | |||
nir_intrinsic_instr *instr, LLVMValueRef result[4]) | |||
{ | |||
LLVMValueRef offset = get_src(bld_base, instr->src[0]); | |||
bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]); | |||
bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), | |||
nir_src_bit_size(instr->src[0]), | |||
offset_is_uniform, offset, result); | |||
} | |||
static void visit_intrinsic(struct lp_build_nir_context *bld_base, | |||
nir_intrinsic_instr *instr) | |||
{ | |||
@@ -1254,6 +1265,9 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, | |||
break; | |||
case nir_intrinsic_memory_barrier: | |||
break; | |||
case nir_intrinsic_load_kernel_input: | |||
visit_load_kernel_input(bld_base, instr, result); | |||
break; | |||
default: | |||
assert(0); | |||
break; |
@@ -61,6 +61,13 @@ struct lp_build_nir_context | |||
bool offset_is_uniform, | |||
LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[4]); | |||
void (*load_kernel_arg)(struct lp_build_nir_context *bld_base, | |||
unsigned nc, | |||
unsigned bit_size, | |||
unsigned offset_bit_size, | |||
bool offset_is_uniform, | |||
LLVMValueRef offset, LLVMValueRef result[4]); | |||
/* for SSBO and shared memory */ | |||
void (*load_mem)(struct lp_build_nir_context *bld_base, | |||
unsigned nc, unsigned bit_size, | |||
@@ -186,6 +193,8 @@ struct lp_build_nir_soa_context | |||
* set. The inputs[] array above is unused then. | |||
*/ | |||
LLVMValueRef inputs_array; | |||
LLVMValueRef kernel_args_ptr; | |||
}; | |||
bool |
@@ -488,6 +488,45 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base, | |||
} | |||
} | |||
static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, | |||
unsigned nc, | |||
unsigned bit_size, | |||
unsigned offset_bit_size, | |||
bool offset_is_uniform, | |||
LLVMValueRef offset, | |||
LLVMValueRef result[4]) | |||
{ | |||
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; | |||
struct gallivm_state *gallivm = bld_base->base.gallivm; | |||
LLVMBuilderRef builder = gallivm->builder; | |||
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size); | |||
LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr; | |||
unsigned size_shift = 0; | |||
struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size); | |||
if (bit_size == 16) | |||
size_shift = 1; | |||
else if (bit_size == 32) | |||
size_shift = 2; | |||
else if (bit_size == 64) | |||
size_shift = 3; | |||
if (size_shift) | |||
offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift)); | |||
LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0); | |||
kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, ""); | |||
if (offset_is_uniform) { | |||
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); | |||
for (unsigned c = 0; c < nc; c++) { | |||
LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), ""); | |||
LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset); | |||
result[c] = lp_build_broadcast_scalar(bld_broad, scalar); | |||
} | |||
} | |||
} | |||
static void emit_load_ubo(struct lp_build_nir_context *bld_base, | |||
unsigned nc, | |||
unsigned bit_size, | |||
@@ -1205,6 +1244,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, | |||
bld.bld_base.store_reg = emit_store_reg; | |||
bld.bld_base.emit_var_decl = emit_var_decl; | |||
bld.bld_base.load_ubo = emit_load_ubo; | |||
bld.bld_base.load_kernel_arg = emit_load_kernel_arg; | |||
bld.bld_base.tex = emit_tex; | |||
bld.bld_base.tex_size = emit_tex_size; | |||
bld.bld_base.bgnloop = bgnloop; | |||
@@ -1241,7 +1281,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, | |||
bld.image = params->image; | |||
bld.shared_ptr = params->shared_ptr; | |||
bld.coro = params->coro; | |||
bld.kernel_args_ptr = params->kernel_args; | |||
bld.indirects = 0; | |||
if (params->info->indirect_files & (1 << TGSI_FILE_INPUT)) | |||
bld.indirects |= nir_var_shader_in; |
@@ -256,6 +256,7 @@ struct lp_build_tgsi_params { | |||
const struct lp_build_image_soa *image; | |||
LLVMValueRef shared_ptr; | |||
const struct lp_build_coro_suspend_info *coro; | |||
LLVMValueRef kernel_args; | |||
}; | |||
void |
@@ -362,6 +362,8 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp) | |||
elem_types[LP_JIT_CS_CTX_SHARED_SIZE] = LLVMInt32TypeInContext(lc); | |||
elem_types[LP_JIT_CS_CTX_KERNEL_ARGS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); | |||
cs_context_type = LLVMStructTypeInContext(lc, elem_types, | |||
ARRAY_SIZE(elem_types), 0); | |||
@@ -389,6 +391,9 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp) | |||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, shared_size, | |||
gallivm->target, cs_context_type, | |||
LP_JIT_CS_CTX_SHARED_SIZE); | |||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, kernel_args, | |||
gallivm->target, cs_context_type, | |||
LP_JIT_CS_CTX_KERNEL_ARGS); | |||
LP_CHECK_STRUCT_SIZE(struct lp_jit_cs_context, | |||
gallivm->target, cs_context_type); | |||
@@ -324,6 +324,8 @@ struct lp_jit_cs_context | |||
const uint32_t *ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; | |||
int num_ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; | |||
void *kernel_args; | |||
uint32_t shared_size; | |||
}; | |||
@@ -339,6 +341,7 @@ enum { | |||
LP_JIT_CS_CTX_IMAGES, | |||
LP_JIT_CS_CTX_SSBOS, | |||
LP_JIT_CS_CTX_NUM_SSBOS, | |||
LP_JIT_CS_CTX_KERNEL_ARGS, | |||
LP_JIT_CS_CTX_SHARED_SIZE, | |||
LP_JIT_CS_CTX_COUNT | |||
}; | |||
@@ -367,6 +370,9 @@ enum { | |||
#define lp_jit_cs_context_shared_size(_gallivm, _ptr) \ | |||
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size") | |||
#define lp_jit_cs_context_kernel_args(_gallivm, _ptr) \ | |||
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CS_CTX_KERNEL_ARGS, "kernel_args") | |||
typedef void | |||
(*lp_jit_cs_func)(const struct lp_jit_cs_context *context, |
@@ -277,6 +277,7 @@ generate_compute(struct llvmpipe_context *lp, | |||
LLVMValueRef consts_ptr, num_consts_ptr; | |||
LLVMValueRef ssbo_ptr, num_ssbo_ptr; | |||
LLVMValueRef shared_ptr; | |||
LLVMValueRef kernel_args_ptr; | |||
struct lp_build_mask_context mask; | |||
struct lp_bld_tgsi_system_values system_values; | |||
@@ -285,6 +286,8 @@ generate_compute(struct llvmpipe_context *lp, | |||
num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr); | |||
ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr); | |||
num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr); | |||
kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm, context_ptr); | |||
shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr); | |||
/* these are coroutine entrypoint necessities */ | |||
@@ -360,6 +363,7 @@ generate_compute(struct llvmpipe_context *lp, | |||
params.image = image; | |||
params.shared_ptr = shared_ptr; | |||
params.coro = &coro_info; | |||
params.kernel_args = kernel_args_ptr; | |||
if (shader->base.type == PIPE_SHADER_IR_TGSI) | |||
lp_build_tgsi_soa(gallivm, shader->base.tokens, ¶ms, NULL); | |||
@@ -1093,7 +1097,7 @@ update_csctx_ssbo(struct llvmpipe_context *llvmpipe) | |||
} | |||
static void | |||
llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe) | |||
llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, void *input) | |||
{ | |||
if (llvmpipe->cs_dirty & (LP_CSNEW_CS)) | |||
llvmpipe_update_cs(llvmpipe); | |||
@@ -1127,6 +1131,12 @@ llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe) | |||
ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]), | |||
llvmpipe->images[PIPE_SHADER_COMPUTE]); | |||
if (input) { | |||
struct lp_cs_context *csctx = llvmpipe->csctx; | |||
csctx->input = input; | |||
csctx->cs.current.jit_context.kernel_args = input; | |||
} | |||
llvmpipe->cs_dirty = 0; | |||
} | |||
@@ -1193,7 +1203,7 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe, | |||
memset(&job_info, 0, sizeof(job_info)); | |||
llvmpipe_cs_update_derived(llvmpipe); | |||
llvmpipe_cs_update_derived(llvmpipe, info->input); | |||
fill_grid_size(pipe, info, job_info.grid_size); | |||
@@ -120,6 +120,8 @@ struct lp_cs_context { | |||
struct { | |||
struct pipe_image_view current; | |||
} images[LP_MAX_TGSI_SHADER_IMAGES]; | |||
void *input; | |||
}; | |||
struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe); |