This prevents IB rejections due to insane memory usage from many concecutive texture uploads. Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>tags/12.0-branchpoint
@@ -60,7 +60,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, | |||
} | |||
ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); | |||
r600_need_dma_space(&rctx->b, ncopy * 5); | |||
r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); | |||
for (i = 0; i < ncopy; i++) { | |||
csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; | |||
/* emit reloc before writing cs so that cs is always in consistent state */ |
@@ -3442,7 +3442,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, | |||
size = (copy_height * pitch) / 4; | |||
ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); | |||
r600_need_dma_space(&rctx->b, ncopy * 9); | |||
r600_need_dma_space(&rctx->b, ncopy * 9, &rdst->resource, &rsrc->resource); | |||
for (i = 0; i < ncopy; i++) { | |||
cheight = copy_height; |
@@ -467,7 +467,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx, | |||
size >>= 2; /* convert to dwords */ | |||
ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW); | |||
r600_need_dma_space(&rctx->b, ncopy * 5); | |||
r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); | |||
for (i = 0; i < ncopy; i++) { | |||
csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; | |||
/* emit reloc before writing cs so that cs is always in consistent state */ |
@@ -2918,7 +2918,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, | |||
*/ | |||
cheight = ((R600_DMA_COPY_MAX_SIZE_DW * 4) / pitch) & 0xfffffff8; | |||
ncopy = (copy_height / cheight) + !!(copy_height % cheight); | |||
r600_need_dma_space(&rctx->b, ncopy * 7); | |||
r600_need_dma_space(&rctx->b, ncopy * 7, &rdst->resource, &rsrc->resource); | |||
for (i = 0; i < ncopy; i++) { | |||
cheight = cheight > copy_height ? copy_height : cheight; |
@@ -136,14 +136,33 @@ void r600_draw_rectangle(struct blitter_context *blitter, | |||
pipe_resource_reference(&buf, NULL); | |||
} | |||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw) | |||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, | |||
struct r600_resource *dst, struct r600_resource *src) | |||
{ | |||
uint64_t vram = 0, gtt = 0; | |||
if (dst) { | |||
if (dst->domains & RADEON_DOMAIN_VRAM) | |||
vram += dst->buf->size; | |||
else if (dst->domains & RADEON_DOMAIN_GTT) | |||
gtt += dst->buf->size; | |||
} | |||
if (src) { | |||
if (src->domains & RADEON_DOMAIN_VRAM) | |||
vram += src->buf->size; | |||
else if (src->domains & RADEON_DOMAIN_GTT) | |||
gtt += src->buf->size; | |||
} | |||
/* Flush the GFX IB if it's not empty. */ | |||
if (ctx->gfx.cs->cdw > ctx->initial_gfx_cs_size) | |||
ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); | |||
/* Flush if there's not enough space. */ | |||
if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw) { | |||
/* Flush if there's not enough space, or if the memory usage per IB | |||
* is too large. | |||
*/ | |||
if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw || | |||
!ctx->ws->cs_memory_below_limit(ctx->dma.cs, vram, gtt)) { | |||
ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); | |||
assert((num_dw + ctx->dma.cs->cdw) <= ctx->dma.cs->max_dw); | |||
} | |||
@@ -157,7 +176,7 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx) | |||
/* done at the end of DMA calls, so increment this. */ | |||
rctx->num_dma_calls++; | |||
r600_need_dma_space(rctx, 1); | |||
r600_need_dma_space(rctx, 1, NULL, NULL); | |||
if (cs->cdw == 0) /* empty queue */ | |||
return; |
@@ -597,7 +597,8 @@ void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_re | |||
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, | |||
const struct pipe_resource *templ); | |||
const char *r600_get_llvm_processor_name(enum radeon_family family); | |||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw); | |||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, | |||
struct r600_resource *dst, struct r600_resource *src); | |||
void r600_dma_emit_wait_idle(struct r600_common_context *rctx); | |||
/* r600_gpu_load.c */ |
@@ -47,7 +47,7 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx, | |||
src_offset += r600_resource(src)->gpu_address; | |||
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); | |||
r600_need_dma_space(&ctx->b, ncopy * 7); | |||
r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc); | |||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ, | |||
RADEON_PRIO_SDMA_BUFFER); | |||
@@ -212,7 +212,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, | |||
srcy + copy_height != (1 << 14)))) { | |||
struct radeon_winsys_cs *cs = sctx->b.dma.cs; | |||
r600_need_dma_space(&sctx->b, 13); | |||
r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource); | |||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource, | |||
RADEON_USAGE_READ, | |||
RADEON_PRIO_SDMA_TEXTURE); | |||
@@ -382,7 +382,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, | |||
copy_depth <= (1 << 11)) { | |||
struct radeon_winsys_cs *cs = sctx->b.dma.cs; | |||
r600_need_dma_space(&sctx->b, 14); | |||
r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource); | |||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource, | |||
RADEON_USAGE_READ, | |||
RADEON_PRIO_SDMA_TEXTURE); | |||
@@ -484,7 +484,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, | |||
dstx + copy_width != (1 << 14)))) { | |||
struct radeon_winsys_cs *cs = sctx->b.dma.cs; | |||
r600_need_dma_space(&sctx->b, 15); | |||
r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource); | |||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource, | |||
RADEON_USAGE_READ, | |||
RADEON_PRIO_SDMA_TEXTURE); |
@@ -64,7 +64,7 @@ static void si_dma_copy_buffer(struct si_context *ctx, | |||
} | |||
ncopy = (size / max_csize) + !!(size % max_csize); | |||
r600_need_dma_space(&ctx->b, ncopy * 5); | |||
r600_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc); | |||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ, | |||
RADEON_PRIO_SDMA_BUFFER); | |||
@@ -161,7 +161,7 @@ static void si_dma_copy_tile(struct si_context *ctx, | |||
mt = G_009910_MICRO_TILE_MODE(tile_mode); | |||
size = (copy_height * pitch) / 4; | |||
ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW); | |||
r600_need_dma_space(&ctx->b, ncopy * 9); | |||
r600_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource); | |||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource, | |||
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); |