Pārlūkot izejas kodu

radeonsi: rework uploading border colors

The border colors are uploaded only once when the state is created.

This brings truly immutable sampler descriptors, because they don't have
to be updated every time a sampler state is re-bound.

It also moves the TA_BC_BASE_ADDR registers to init_config, removing one
more state. The catch is there is now a limit: only 4096 border colors can
be used by one context. I don't think that will be a problem.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
tags/11.1-branchpoint
Marek Olšák pirms 10 gadiem
vecāks
revīzija
a9971e85d9

+ 7
- 2
src/gallium/drivers/radeonsi/si_descriptors.c Parādīt failu

@@ -273,13 +273,17 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}

void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
unsigned start, unsigned count, void **states)
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
unsigned start, unsigned count, void **states)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_sampler_states *samplers = &sctx->samplers[shader].states;
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
int i;

if (!count || shader >= SI_NUM_SHADERS)
return;

if (start == 0)
samplers->saved_states[0] = states[0];
if (start == 1)
@@ -1022,6 +1026,7 @@ void si_init_all_descriptors(struct si_context *sctx)
4, SI_NUM_VERTEX_BUFFERS);

/* Set pipe_context functions. */
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;

+ 22
- 1
src/gallium/drivers/radeonsi/si_pipe.c Parādīt failu

@@ -44,7 +44,8 @@ static void si_destroy_context(struct pipe_context *context)
pipe_resource_reference(&sctx->gsvs_ring, NULL);
pipe_resource_reference(&sctx->tf_ring, NULL);
pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
r600_resource_reference(&sctx->border_color_table, NULL);
r600_resource_reference(&sctx->border_color_buffer, NULL);
free(sctx->border_color_table);
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);

@@ -139,6 +140,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sscreen->b.trace_bo->cs_buf : NULL);
sctx->b.rings.gfx.flush = si_context_gfx_flush;

/* Border colors. */
sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
sizeof(*sctx->border_color_table));
if (!sctx->border_color_table)
goto fail;

sctx->border_color_buffer = (struct r600_resource*)
pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
SI_MAX_BORDER_COLORS *
sizeof(*sctx->border_color_table));
if (!sctx->border_color_buffer)
goto fail;

sctx->border_color_map =
ws->buffer_map(sctx->border_color_buffer->cs_buf,
NULL, PIPE_TRANSFER_WRITE);
if (!sctx->border_color_map)
goto fail;

si_init_all_descriptors(sctx);
si_init_state_functions(sctx);
si_init_shader_functions(sctx);
@@ -197,6 +217,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,

return &sctx->b.b;
fail:
fprintf(stderr, "radeonsi: Failed to create a context.\n");
si_destroy_context(&sctx->b.b);
return NULL;
}

+ 5
- 3
src/gallium/drivers/radeonsi/si_pipe.h Parādīt failu

@@ -79,6 +79,7 @@
#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff)

#define SI_MAX_VIEWPORTS 16
#define SI_MAX_BORDER_COLORS 4096

struct si_compute;

@@ -103,7 +104,6 @@ struct si_sampler_view {

struct si_sampler_state {
uint32_t val[4];
uint32_t border_color[4];
};

struct si_cs_shader_state {
@@ -219,8 +219,10 @@ struct si_context {
struct pipe_resource *esgs_ring;
struct pipe_resource *gsvs_ring;
struct pipe_resource *tf_ring;
struct r600_resource *border_color_table;
unsigned border_color_offset;
union pipe_color_union *border_color_table; /* in CPU memory, any endian */
struct r600_resource *border_color_buffer;
union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
unsigned border_color_count;

/* Vertex and index buffers. */
bool vertex_buffers_dirty;

+ 41
- 83
src/gallium/drivers/radeonsi/si_state.c Parādīt failu

@@ -2701,9 +2701,10 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st
static void *si_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
unsigned border_color_type;
unsigned border_color_type, border_color_index = 0;

if (rstate == NULL) {
return NULL;
@@ -2726,9 +2727,38 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
state->border_color.f[2] == 1 &&
state->border_color.f[3] == 1)
border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
else
else {
int i;

border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;

/* Check if the border has been uploaded already. */
for (i = 0; i < sctx->border_color_count; i++)
if (memcmp(&sctx->border_color_table[i], &state->border_color,
sizeof(state->border_color)) == 0)
break;

if (i >= SI_MAX_BORDER_COLORS) {
/* Getting 4096 unique border colors is very unlikely. */
fprintf(stderr, "radeonsi: The border color table is full. "
"Any new border colors will be just black. "
"Please file a bug.\n");
border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
} else {
if (i == sctx->border_color_count) {
/* Upload a new border color. */
memcpy(&sctx->border_color_table[i], &state->border_color,
sizeof(state->border_color));
util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
&state->border_color,
sizeof(state->border_color));
sctx->border_color_count++;
}

border_color_index = i;
}
}

rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
@@ -2742,89 +2772,11 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);

if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
memcpy(rstate->border_color, state->border_color.ui,
sizeof(rstate->border_color));
}

rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
S_008F3C_BORDER_COLOR_TYPE(border_color_type);
return rstate;
}

/* Upload border colors and update the pointers in resource descriptors.
* There can only be 4096 border colors per context.
*
* XXX: This is broken if the buffer gets reallocated.
*/
static void si_set_border_colors(struct si_context *sctx, unsigned count,
void **states)
{
struct si_sampler_state **rstates = (struct si_sampler_state **)states;
uint32_t *border_color_table = NULL;
int i, j;

for (i = 0; i < count; i++) {
if (rstates[i] &&
G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
if (!sctx->border_color_table ||
((sctx->border_color_offset + count - i) &
C_008F3C_BORDER_COLOR_PTR)) {
r600_resource_reference(&sctx->border_color_table, NULL);
sctx->border_color_offset = 0;

sctx->border_color_table =
si_resource_create_custom(&sctx->screen->b.b,
PIPE_USAGE_DYNAMIC,
4096 * 4 * 4);
}

if (!border_color_table) {
border_color_table =
sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
sctx->b.rings.gfx.cs,
PIPE_TRANSFER_WRITE |
PIPE_TRANSFER_UNSYNCHRONIZED);
}

for (j = 0; j < 4; j++) {
border_color_table[4 * sctx->border_color_offset + j] =
util_le32_to_cpu(rstates[i]->border_color[j]);
}

rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
}
}

if (border_color_table) {
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);

uint64_t va_offset = sctx->border_color_table->gpu_address;

si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
}
}

static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
unsigned start, unsigned count,
void **states)
{
struct si_context *sctx = (struct si_context *)ctx;

if (!count || shader >= SI_NUM_SHADERS)
return;

si_set_border_colors(sctx, count, states);
si_set_sampler_descriptors(sctx, shader, start, count, states);
}

static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -3105,7 +3057,6 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.get_sample_position = cayman_get_sample_position;

sctx->b.b.create_sampler_state = si_create_sampler_state;
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.delete_sampler_state = si_delete_sampler_state;

sctx->b.b.create_sampler_view = si_create_sampler_view;
@@ -3270,6 +3221,7 @@ static void si_init_config(struct si_context *sctx)
unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
unsigned raster_config, raster_config_1;
uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
int i;

@@ -3434,5 +3386,11 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
}

si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);

sctx->init_config = pm4;
}

+ 0
- 3
src/gallium/drivers/radeonsi/si_state.h Parādīt failu

@@ -91,7 +91,6 @@ union si_state {
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
struct si_pm4_state *poly_offset;
struct si_pm4_state *ta_bordercolor_base;
struct si_pm4_state *ls;
struct si_pm4_state *hs;
struct si_pm4_state *es;
@@ -246,8 +245,6 @@ struct si_buffer_resources {
} while(0)

/* si_descriptors.c */
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
unsigned start, unsigned count, void **states);
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,

Notiek ielāde…
Atcelt
Saglabāt