It was partly a state and partly emulated by shader code, but since we want to do this in a fragment shader prolog, we need to put it into the shader key, which will be used to generate the prolog. This also removes the spi_ps_input states and moves the registers to the PS state. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>tags/11.2-branchpoint
@@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx) | |||
si_mark_atom_dirty(ctx, &ctx->db_render_state); | |||
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); | |||
si_mark_atom_dirty(ctx, &ctx->spi_map); | |||
si_mark_atom_dirty(ctx, &ctx->spi_ps_input); | |||
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); | |||
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); | |||
si_all_descriptors_begin_new_cs(ctx); |
@@ -202,7 +202,6 @@ struct si_context { | |||
struct si_viewports viewports; | |||
struct si_stencil_ref stencil_ref; | |||
struct r600_atom spi_map; | |||
struct r600_atom spi_ps_input; | |||
/* Precomputed states. */ | |||
struct si_pm4_state *init_config; | |||
@@ -222,7 +221,6 @@ struct si_context { | |||
struct si_vertex_element *vertex_elements; | |||
unsigned sprite_coord_enable; | |||
bool flatshade; | |||
bool force_persample_interp; | |||
/* shader descriptors */ | |||
struct si_descriptors vertex_buffers; |
@@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) | |||
} | |||
/* This shouldn't be used by explicit INTERP opcodes. */ | |||
static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, | |||
unsigned param) | |||
static unsigned select_interp_param(struct si_shader_context *si_shader_ctx, | |||
unsigned param) | |||
{ | |||
struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; | |||
unsigned sample_param = 0; | |||
LLVMValueRef default_ij, sample_ij, force_sample; | |||
default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param); | |||
if (!si_shader_ctx->shader->key.ps.force_persample_interp) | |||
return param; | |||
/* If the shader doesn't use center/centroid, just return the parameter. | |||
* | |||
@@ -850,36 +847,15 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, | |||
switch (param) { | |||
case SI_PARAM_PERSP_CENTROID: | |||
case SI_PARAM_PERSP_CENTER: | |||
if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp) | |||
return default_ij; | |||
sample_param = SI_PARAM_PERSP_SAMPLE; | |||
break; | |||
return SI_PARAM_PERSP_SAMPLE; | |||
case SI_PARAM_LINEAR_CENTROID: | |||
case SI_PARAM_LINEAR_CENTER: | |||
if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear) | |||
return default_ij; | |||
sample_param = SI_PARAM_LINEAR_SAMPLE; | |||
break; | |||
return SI_PARAM_LINEAR_SAMPLE; | |||
default: | |||
return default_ij; | |||
return param; | |||
} | |||
/* Otherwise, we have to select (i,j) based on a user data SGPR. */ | |||
sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param); | |||
/* TODO: this can be done more efficiently by switching between | |||
* 2 prologs. | |||
*/ | |||
force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, | |||
SI_PARAM_PS_STATE_BITS); | |||
force_sample = LLVMBuildTrunc(gallivm->builder, force_sample, | |||
LLVMInt1TypeInContext(gallivm->context), ""); | |||
return LLVMBuildSelect(gallivm->builder, force_sample, | |||
sample_ij, default_ij, ""); | |||
} | |||
static void declare_input_fs( | |||
@@ -918,8 +894,11 @@ static void declare_input_fs( | |||
decl->Interp.Location); | |||
if (interp_param_idx == -1) | |||
return; | |||
else if (interp_param_idx) | |||
interp_param = get_interp_param(si_shader_ctx, interp_param_idx); | |||
else if (interp_param_idx) { | |||
interp_param_idx = select_interp_param(si_shader_ctx, | |||
interp_param_idx); | |||
interp_param = LLVMGetParam(main_fn, interp_param_idx); | |||
} | |||
/* fs.constant returns the param from the middle vertex, so it's not | |||
* really useful for flat shading. It's meant to be used for custom | |||
@@ -3633,7 +3612,6 @@ static void create_function(struct si_shader_context *si_shader_ctx) | |||
case TGSI_PROCESSOR_FRAGMENT: | |||
params[SI_PARAM_ALPHA_REF] = f32; | |||
params[SI_PARAM_PS_STATE_BITS] = i32; | |||
params[SI_PARAM_PRIM_MASK] = i32; | |||
last_sgpr = SI_PARAM_PRIM_MASK; | |||
params[SI_PARAM_PERSP_SAMPLE] = v2i32; |
@@ -88,7 +88,6 @@ struct radeon_shader_reloc; | |||
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ | |||
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ | |||
#define SI_SGPR_ALPHA_REF 8 /* PS only */ | |||
#define SI_SGPR_PS_STATE_BITS 9 /* PS only */ | |||
#define SI_VS_NUM_USER_SGPR 13 /* API VS */ | |||
#define SI_ES_NUM_USER_SGPR 12 /* API VS */ | |||
@@ -97,7 +96,7 @@ struct radeon_shader_reloc; | |||
#define SI_TES_NUM_USER_SGPR 10 | |||
#define SI_GS_NUM_USER_SGPR 8 | |||
#define SI_GSCOPY_NUM_USER_SGPR 4 | |||
#define SI_PS_NUM_USER_SGPR 10 | |||
#define SI_PS_NUM_USER_SGPR 9 | |||
/* LLVM function parameter indices */ | |||
#define SI_PARAM_RW_BUFFERS 0 | |||
@@ -152,27 +151,23 @@ struct radeon_shader_reloc; | |||
/* PS only parameters */ | |||
#define SI_PARAM_ALPHA_REF 4 | |||
/* Bits: | |||
* 0: force_persample_interp | |||
*/ | |||
#define SI_PARAM_PS_STATE_BITS 5 | |||
#define SI_PARAM_PRIM_MASK 6 | |||
#define SI_PARAM_PERSP_SAMPLE 7 | |||
#define SI_PARAM_PERSP_CENTER 8 | |||
#define SI_PARAM_PERSP_CENTROID 9 | |||
#define SI_PARAM_PERSP_PULL_MODEL 10 | |||
#define SI_PARAM_LINEAR_SAMPLE 11 | |||
#define SI_PARAM_LINEAR_CENTER 12 | |||
#define SI_PARAM_LINEAR_CENTROID 13 | |||
#define SI_PARAM_LINE_STIPPLE_TEX 14 | |||
#define SI_PARAM_POS_X_FLOAT 15 | |||
#define SI_PARAM_POS_Y_FLOAT 16 | |||
#define SI_PARAM_POS_Z_FLOAT 17 | |||
#define SI_PARAM_POS_W_FLOAT 18 | |||
#define SI_PARAM_FRONT_FACE 19 | |||
#define SI_PARAM_ANCILLARY 20 | |||
#define SI_PARAM_SAMPLE_COVERAGE 21 | |||
#define SI_PARAM_POS_FIXED_PT 22 | |||
#define SI_PARAM_PRIM_MASK 5 | |||
#define SI_PARAM_PERSP_SAMPLE 6 | |||
#define SI_PARAM_PERSP_CENTER 7 | |||
#define SI_PARAM_PERSP_CENTROID 8 | |||
#define SI_PARAM_PERSP_PULL_MODEL 9 | |||
#define SI_PARAM_LINEAR_SAMPLE 10 | |||
#define SI_PARAM_LINEAR_CENTER 11 | |||
#define SI_PARAM_LINEAR_CENTROID 12 | |||
#define SI_PARAM_LINE_STIPPLE_TEX 13 | |||
#define SI_PARAM_POS_X_FLOAT 14 | |||
#define SI_PARAM_POS_Y_FLOAT 15 | |||
#define SI_PARAM_POS_Z_FLOAT 16 | |||
#define SI_PARAM_POS_W_FLOAT 17 | |||
#define SI_PARAM_FRONT_FACE 18 | |||
#define SI_PARAM_ANCILLARY 19 | |||
#define SI_PARAM_SAMPLE_COVERAGE 20 | |||
#define SI_PARAM_POS_FIXED_PT 21 | |||
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) | |||
@@ -193,14 +188,6 @@ struct si_shader_selector { | |||
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ | |||
unsigned type; | |||
/* Whether the shader has to use a conditional assignment to | |||
* choose between weights when emulating | |||
* pipe_rasterizer_state::force_persample_interp. | |||
* If false, "si_emit_spi_ps_input" will take care of it instead. | |||
*/ | |||
bool forces_persample_interp_for_persp; | |||
bool forces_persample_interp_for_linear; | |||
/* GS parameters. */ | |||
unsigned esgs_itemsize; | |||
unsigned gs_input_verts_per_prim; | |||
@@ -245,6 +232,7 @@ union si_shader_key { | |||
unsigned poly_stipple:1; | |||
unsigned poly_line_smoothing:1; | |||
unsigned clamp_color:1; | |||
unsigned force_persample_interp:1; | |||
} ps; | |||
struct { | |||
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; |
@@ -133,7 +133,6 @@ union si_state_atoms { | |||
struct r600_atom *viewports; | |||
struct r600_atom *stencil_ref; | |||
struct r600_atom *spi_map; | |||
struct r600_atom *spi_ps_input; | |||
} s; | |||
struct r600_atom *array[0]; | |||
}; |
@@ -472,6 +472,17 @@ static void si_shader_ps(struct si_shader *shader) | |||
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); | |||
uint64_t va; | |||
bool has_centroid; | |||
unsigned input_ena = shader->config.spi_ps_input_ena; | |||
/* we need to enable at least one of them, otherwise we hang the GPU */ | |||
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || | |||
G_0286CC_PERSP_CENTER_ENA(input_ena) || | |||
G_0286CC_PERSP_CENTROID_ENA(input_ena) || | |||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || | |||
G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || | |||
G_0286CC_LINEAR_CENTER_ENA(input_ena) || | |||
G_0286CC_LINEAR_CENTROID_ENA(input_ena) || | |||
G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); | |||
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); | |||
@@ -515,6 +526,9 @@ static void si_shader_ps(struct si_shader *shader) | |||
shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) | |||
spi_shader_col_format = V_028714_SPI_SHADER_32_R; | |||
si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); | |||
si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena); | |||
/* Set interpolation controls. */ | |||
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) || | |||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena); | |||
@@ -706,6 +720,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, | |||
(is_line && rs->line_smooth)) && | |||
sctx->framebuffer.nr_samples <= 1; | |||
key->ps.clamp_color = rs->clamp_fragment_color; | |||
key->ps.force_persample_interp = rs->force_persample_interp && | |||
rs->multisample_enable && | |||
sctx->framebuffer.nr_samples > 1 && | |||
sctx->ps_iter_samples > 1 && | |||
(sel->info.uses_persp_center || | |||
sel->info.uses_persp_centroid || | |||
sel->info.uses_linear_center || | |||
sel->info.uses_linear_centroid); | |||
} | |||
key->ps.alpha_func = si_get_alpha_test_func(sctx); | |||
@@ -808,7 +831,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, | |||
sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor); | |||
p_atomic_inc(&sscreen->b.num_shaders_created); | |||
/* First set which opcode uses which (i,j) pair. */ | |||
/* Set which opcode uses which (i,j) pair. */ | |||
if (sel->info.uses_persp_opcode_interp_centroid) | |||
sel->info.uses_persp_centroid = true; | |||
@@ -823,19 +846,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx, | |||
sel->info.uses_linear_opcode_interp_sample) | |||
sel->info.uses_linear_center = true; | |||
/* Determine if the shader has to use a conditional assignment when | |||
* emulating force_persample_interp. | |||
*/ | |||
sel->forces_persample_interp_for_persp = | |||
sel->info.uses_persp_center + | |||
sel->info.uses_persp_centroid + | |||
sel->info.uses_persp_sample >= 2; | |||
sel->forces_persample_interp_for_linear = | |||
sel->info.uses_linear_center + | |||
sel->info.uses_linear_centroid + | |||
sel->info.uses_linear_sample >= 2; | |||
switch (sel->type) { | |||
case PIPE_SHADER_GEOMETRY: | |||
sel->gs_output_prim = | |||
@@ -1181,68 +1191,6 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) | |||
assert(num_interp == num_written); | |||
} | |||
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom) | |||
{ | |||
struct radeon_winsys_cs *cs = sctx->b.gfx.cs; | |||
struct si_shader *ps = sctx->ps_shader.current; | |||
unsigned input_ena; | |||
if (!ps) | |||
return; | |||
input_ena = ps->config.spi_ps_input_ena; | |||
/* we need to enable at least one of them, otherwise we hang the GPU */ | |||
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || | |||
G_0286CC_PERSP_CENTER_ENA(input_ena) || | |||
G_0286CC_PERSP_CENTROID_ENA(input_ena) || | |||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || | |||
G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || | |||
G_0286CC_LINEAR_CENTER_ENA(input_ena) || | |||
G_0286CC_LINEAR_CENTROID_ENA(input_ena) || | |||
G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); | |||
if (sctx->force_persample_interp) { | |||
unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) + | |||
G_0286CC_PERSP_CENTER_ENA(input_ena) + | |||
G_0286CC_PERSP_CENTROID_ENA(input_ena); | |||
unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) + | |||
G_0286CC_LINEAR_CENTER_ENA(input_ena) + | |||
G_0286CC_LINEAR_CENTROID_ENA(input_ena); | |||
/* If only one set of (i,j) coordinates is used, we can disable | |||
* CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates | |||
* where CENTER/CENTROID are expected, effectively forcing per-sample | |||
* interpolation. | |||
*/ | |||
if (num_persp == 1) { | |||
input_ena &= C_0286CC_PERSP_CENTER_ENA; | |||
input_ena &= C_0286CC_PERSP_CENTROID_ENA; | |||
input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1); | |||
} | |||
if (num_linear == 1) { | |||
input_ena &= C_0286CC_LINEAR_CENTER_ENA; | |||
input_ena &= C_0286CC_LINEAR_CENTROID_ENA; | |||
input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1); | |||
} | |||
/* If at least 2 sets of coordinates are used, we can't use this | |||
* trick and have to select SAMPLE using a conditional assignment | |||
* in the shader with "force_persample_interp" being a shader constant. | |||
*/ | |||
} | |||
radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); | |||
radeon_emit(cs, input_ena); | |||
radeon_emit(cs, input_ena); | |||
if (ps->selector->forces_persample_interp_for_persp || | |||
ps->selector->forces_persample_interp_for_linear) | |||
radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + | |||
SI_SGPR_PS_STATE_BITS * 4, | |||
sctx->force_persample_interp); | |||
} | |||
/** | |||
* Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that. | |||
*/ | |||
@@ -1774,12 +1722,6 @@ bool si_update_shaders(struct si_context *sctx) | |||
si_mark_atom_dirty(sctx, &sctx->spi_map); | |||
} | |||
if (si_pm4_state_changed(sctx, ps) || | |||
sctx->force_persample_interp != rs->force_persample_interp) { | |||
sctx->force_persample_interp = rs->force_persample_interp; | |||
si_mark_atom_dirty(sctx, &sctx->spi_ps_input); | |||
} | |||
if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps)) | |||
si_mark_atom_dirty(sctx, &sctx->cb_render_state); | |||
@@ -1812,7 +1754,6 @@ bool si_update_shaders(struct si_context *sctx) | |||
void si_init_shader_functions(struct si_context *sctx) | |||
{ | |||
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); | |||
si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input); | |||
sctx->b.b.create_vs_state = si_create_shader_selector; | |||
sctx->b.b.create_tcs_state = si_create_shader_selector; |