With the state accessed from GMEM+submit factored out of fd_context and into fd_batch, now it is possible to punt this off to a helper thread. And more importantly, since there are cases where one context might force the batch-cache to flush another context's batches (ie. when there are too many in-flight batches), using a per-context helper thread keeps various different flushes for a given context serialized. TODO as with batch-cache, there are a few places where we'll need a mutex to protect critical sections, which is completely missing at the moment. Signed-off-by: Rob Clark <robdclark@gmail.com>tags/13.0-branchpoint
@@ -40,6 +40,9 @@ batch_init(struct fd_batch *batch) | |||
struct fd_context *ctx = batch->ctx; | |||
unsigned size = 0; | |||
if (ctx->screen->reorder) | |||
util_queue_fence_init(&batch->flush_fence); | |||
/* if kernel is too old to support unlimited # of cmd buffers, we | |||
* have no option but to allocate large worst-case sizes so that | |||
* we don't need to grow the ringbuffer. Performance is likely to | |||
@@ -119,6 +122,9 @@ batch_fini(struct fd_batch *batch) | |||
fd_hw_sample_reference(batch->ctx, &samp, NULL); | |||
} | |||
util_dynarray_fini(&batch->samples); | |||
if (batch->ctx->screen->reorder) | |||
util_queue_fence_destroy(&batch->flush_fence); | |||
} | |||
static void | |||
@@ -129,7 +135,7 @@ batch_flush_reset_dependencies(struct fd_batch *batch, bool flush) | |||
foreach_batch(dep, cache, batch->dependents_mask) { | |||
if (flush) | |||
fd_batch_flush(dep); | |||
fd_batch_flush(dep, false); | |||
fd_batch_reference(&dep, NULL); | |||
} | |||
@@ -156,6 +162,8 @@ batch_reset(struct fd_batch *batch) | |||
{ | |||
DBG("%p", batch); | |||
fd_batch_sync(batch); | |||
batch_flush_reset_dependencies(batch, false); | |||
batch_reset_resources(batch); | |||
@@ -197,6 +205,31 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch) | |||
util_sprintf(buf, "fd_batch<%u>", batch->seqno); | |||
} | |||
void | |||
fd_batch_sync(struct fd_batch *batch) | |||
{ | |||
if (!batch->ctx->screen->reorder) | |||
return; | |||
util_queue_job_wait(&batch->flush_fence); | |||
} | |||
static void | |||
batch_flush_func(void *job, int id) | |||
{ | |||
struct fd_batch *batch = job; | |||
fd_gmem_render_tiles(batch); | |||
batch_reset_resources(batch); | |||
batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem); | |||
} | |||
static void | |||
batch_cleanup_func(void *job, int id) | |||
{ | |||
struct fd_batch *batch = job; | |||
fd_batch_reference(&batch, NULL); | |||
} | |||
static void | |||
batch_flush(struct fd_batch *batch) | |||
{ | |||
@@ -207,11 +240,25 @@ batch_flush(struct fd_batch *batch) | |||
batch->needs_flush = false; | |||
batch_flush_reset_dependencies(batch, true); | |||
/* close out the draw cmds by making sure any active queries are | |||
* paused: | |||
*/ | |||
fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_NULL); | |||
fd_gmem_render_tiles(batch); | |||
batch->ctx->dirty = ~0; | |||
batch_flush_reset_dependencies(batch, true); | |||
batch_reset_resources(batch); | |||
if (batch->ctx->screen->reorder) { | |||
struct fd_batch *tmp = NULL; | |||
fd_batch_reference(&tmp, batch); | |||
util_queue_add_job(&batch->ctx->flush_queue, | |||
batch, &batch->flush_fence, | |||
batch_flush_func, batch_cleanup_func); | |||
} else { | |||
fd_gmem_render_tiles(batch); | |||
batch_reset_resources(batch); | |||
batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem); | |||
} | |||
debug_assert(batch->reference.count > 0); | |||
@@ -222,8 +269,9 @@ batch_flush(struct fd_batch *batch) | |||
} | |||
} | |||
/* NOTE: could drop the last ref to batch */ | |||
void | |||
fd_batch_flush(struct fd_batch *batch) | |||
fd_batch_flush(struct fd_batch *batch, bool sync) | |||
{ | |||
/* NOTE: we need to hold an extra ref across the body of flush, | |||
* since the last ref to this batch could be dropped when cleaning | |||
@@ -232,6 +280,8 @@ fd_batch_flush(struct fd_batch *batch) | |||
struct fd_batch *tmp = NULL; | |||
fd_batch_reference(&tmp, batch); | |||
batch_flush(tmp); | |||
if (sync) | |||
fd_batch_sync(tmp); | |||
fd_batch_reference(&tmp, NULL); | |||
} | |||
@@ -263,7 +313,7 @@ batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) | |||
*/ | |||
if (batch_depends_on(dep, batch)) { | |||
DBG("%p: flush forced on %p!", batch, dep); | |||
fd_batch_flush(dep); | |||
fd_batch_flush(dep, false); | |||
} else { | |||
struct fd_batch *other = NULL; | |||
fd_batch_reference(&other, dep); | |||
@@ -327,5 +377,5 @@ fd_batch_check_size(struct fd_batch *batch) | |||
struct fd_ringbuffer *ring = batch->draw; | |||
if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) || | |||
(fd_mesa_debug & FD_DBG_FLUSH)) | |||
fd_batch_flush(batch); | |||
fd_batch_flush(batch, true); | |||
} |
@@ -28,6 +28,7 @@ | |||
#define FREEDRENO_BATCH_H_ | |||
#include "util/u_inlines.h" | |||
#include "util/u_queue.h" | |||
#include "util/list.h" | |||
#include "freedreno_util.h" | |||
@@ -76,6 +77,8 @@ struct fd_batch { | |||
struct fd_context *ctx; | |||
struct util_queue_fence flush_fence; | |||
/* do we need to mem2gmem before rendering. We don't, if for example, | |||
* there was a glClear() that invalidated the entire previous buffer | |||
* contents. Keep track of which buffer(s) are cleared, or needs | |||
@@ -197,7 +200,8 @@ struct fd_batch { | |||
struct fd_batch * fd_batch_create(struct fd_context *ctx); | |||
void fd_batch_reset(struct fd_batch *batch); | |||
void fd_batch_flush(struct fd_batch *batch); | |||
void fd_batch_sync(struct fd_batch *batch); | |||
void fd_batch_flush(struct fd_batch *batch, bool sync); | |||
void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write); | |||
void fd_batch_check_size(struct fd_batch *batch); | |||
@@ -128,19 +128,24 @@ uint32_t | |||
fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx) | |||
{ | |||
struct hash_entry *entry; | |||
uint32_t timestamp = 0; | |||
struct fd_batch *last_batch = NULL; | |||
hash_table_foreach(cache->ht, entry) { | |||
struct fd_batch *batch = NULL; | |||
fd_batch_reference(&batch, (struct fd_batch *)entry->data); | |||
if (batch->ctx == ctx) { | |||
fd_batch_flush(batch); | |||
timestamp = MAX2(timestamp, fd_ringbuffer_timestamp(batch->gmem)); | |||
fd_batch_reference(&last_batch, batch); | |||
fd_batch_flush(batch, false); | |||
} | |||
fd_batch_reference(&batch, NULL); | |||
} | |||
return timestamp; | |||
if (last_batch) { | |||
fd_batch_sync(last_batch); | |||
fd_batch_reference(&last_batch, NULL); | |||
} | |||
return ctx->last_fence; | |||
} | |||
void | |||
@@ -238,7 +243,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx) | |||
fd_batch_reference(&flush_batch, cache->batches[i]); | |||
} | |||
DBG("%p: too many batches! flush forced!", flush_batch); | |||
fd_batch_flush(flush_batch); | |||
fd_batch_flush(flush_batch, true); | |||
/* While the resources get cleaned up automatically, the flush_batch | |||
* doesn't get removed from the dependencies of other batches, so |
@@ -48,7 +48,7 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, | |||
if (!ctx->screen->reorder) { | |||
struct fd_batch *batch = NULL; | |||
fd_batch_reference(&batch, ctx->batch); | |||
fd_batch_flush(batch); | |||
fd_batch_flush(batch, true); | |||
timestamp = fd_ringbuffer_timestamp(batch->gmem); | |||
fd_batch_reference(&batch, NULL); | |||
} else { | |||
@@ -103,6 +103,9 @@ fd_context_destroy(struct pipe_context *pctx) | |||
DBG(""); | |||
if (ctx->screen->reorder) | |||
util_queue_destroy(&ctx->flush_queue); | |||
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ | |||
fd_bc_invalidate_context(ctx); | |||
@@ -179,8 +182,11 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, | |||
* batches per compute job (since it isn't using tiling, so no point | |||
* in getting involved with the re-ordering madness).. | |||
*/ | |||
if (!screen->reorder) | |||
if (!screen->reorder) { | |||
ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx); | |||
} else { | |||
util_queue_init(&ctx->flush_queue, "flush_queue", 16, 1); | |||
} | |||
fd_reset_wfi(ctx); | |||
@@ -114,6 +114,8 @@ struct fd_context { | |||
struct fd_device *dev; | |||
struct fd_screen *screen; | |||
struct util_queue flush_queue; | |||
struct blitter_context *blitter; | |||
struct primconvert_context *primconvert; | |||
@@ -161,6 +163,8 @@ struct fd_context { | |||
*/ | |||
struct fd_batch *batch; | |||
uint32_t last_fence; | |||
/* Are we in process of shadowing a resource? Used to detect recursion | |||
* in transfer_map, and skip unneeded synchronization. | |||
*/ |
@@ -405,8 +405,6 @@ fd_gmem_render_tiles(struct fd_batch *batch) | |||
fd_ringbuffer_flush(batch->gmem); | |||
fd_reset_wfi(ctx); | |||
ctx->dirty = ~0; | |||
} | |||
/* tile needs restore if it isn't completely contained within the |
@@ -238,7 +238,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, | |||
* spin forever: | |||
*/ | |||
if (hq->no_wait_cnt++ > 5) | |||
fd_batch_flush(rsc->write_batch); | |||
fd_batch_flush(rsc->write_batch, false); | |||
return false; | |||
} | |||
@@ -266,7 +266,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, | |||
struct fd_resource *rsc = fd_resource(start->prsc); | |||
if (rsc->write_batch) | |||
fd_batch_flush(rsc->write_batch); | |||
fd_batch_flush(rsc->write_batch, true); | |||
/* some piglit tests at least do query with no draws, I guess: */ | |||
if (!rsc->bo) |
@@ -516,12 +516,18 @@ fd_resource_transfer_map(struct pipe_context *pctx, | |||
if (needs_flush) { | |||
if (usage & PIPE_TRANSFER_WRITE) { | |||
struct fd_batch *batch; | |||
foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) | |||
fd_batch_flush(batch); | |||
struct fd_batch *batch, *last_batch = NULL; | |||
foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { | |||
fd_batch_reference(&last_batch, batch); | |||
fd_batch_flush(batch, false); | |||
} | |||
if (last_batch) { | |||
fd_batch_sync(last_batch); | |||
fd_batch_reference(&last_batch, NULL); | |||
} | |||
assert(rsc->batch_mask == 0); | |||
} else { | |||
fd_batch_flush(rsc->write_batch); | |||
fd_batch_flush(rsc->write_batch, true); | |||
} | |||
assert(!rsc->write_batch); | |||
} | |||
@@ -1080,7 +1086,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) | |||
struct fd_resource *rsc = fd_resource(prsc); | |||
if (rsc->write_batch) | |||
fd_batch_flush(rsc->write_batch); | |||
fd_batch_flush(rsc->write_batch, true); | |||
assert(!rsc->write_batch); | |||
} |
@@ -137,14 +137,14 @@ fd_set_framebuffer_state(struct pipe_context *pctx, | |||
* multiple times to the same surface), so we might as | |||
* well go ahead and flush this one: | |||
*/ | |||
fd_batch_flush(old_batch); | |||
fd_batch_flush(old_batch, false); | |||
} | |||
fd_batch_reference(&old_batch, NULL); | |||
} else { | |||
DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, | |||
framebuffer->cbufs[0], framebuffer->zsbuf); | |||
fd_batch_flush(ctx->batch); | |||
fd_batch_flush(ctx->batch, false); | |||
} | |||
cso = &ctx->batch->framebuffer; |