Переглянути джерело

i965: Don't inline intel_batchbuffer_require_space().

It's called by the inline intel_batchbuffer_begin() function which
itself is used in BEGIN_BATCH. So in sequence of code emitting multiple
packets, we have inlined this ~200 byte function multiple times. Making
it an out-of-line function presumably improved icache usage.

Improves performance of Gl32Batch7 by 3.39898% +/- 0.358674% (n=155) on
Ivybridge.

Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
tags/12.0-branchpoint
Matt Turner 9 роки тому
джерело
коміт
a74fc3fe8a

+ 26
- 0
src/mesa/drivers/dri/i965/intel_batchbuffer.c Переглянути файл

@@ -106,6 +106,32 @@ intel_batchbuffer_free(struct brw_context *brw)
drm_intel_bo_unreference(brw->batch.bo);
}

void
intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
enum brw_gpu_ring ring)
{
/* If we're switching rings, implicitly flush the batch. */
if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING &&
brw->gen >= 6) {
intel_batchbuffer_flush(brw);
}

#ifdef DEBUG
assert(sz < BATCH_SZ - BATCH_RESERVED);
#endif
if (intel_batchbuffer_space(brw) < sz)
intel_batchbuffer_flush(brw);

enum brw_gpu_ring prev_ring = brw->batch.ring;
/* The intel_batchbuffer_flush() calls above might have changed
* brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end.
*/
brw->batch.ring = ring;

if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING))
intel_batchbuffer_emit_render_ring_prelude(brw);
}

static void
do_batch_dump(struct brw_context *brw)
{

+ 2
- 26
src/mesa/drivers/dri/i965/intel_batchbuffer.h Переглянути файл

@@ -44,6 +44,8 @@ void intel_batchbuffer_init(struct brw_context *brw);
void intel_batchbuffer_free(struct brw_context *brw);
void intel_batchbuffer_save_state(struct brw_context *brw);
void intel_batchbuffer_reset_to_saved(struct brw_context *brw);
void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
enum brw_gpu_ring ring);

int _intel_batchbuffer_flush(struct brw_context *brw,
const char *file, int line);
@@ -116,32 +118,6 @@ intel_batchbuffer_emit_float(struct brw_context *brw, float f)
intel_batchbuffer_emit_dword(brw, float_as_int(f));
}

static inline void
intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
enum brw_gpu_ring ring)
{
/* If we're switching rings, implicitly flush the batch. */
if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING &&
brw->gen >= 6) {
intel_batchbuffer_flush(brw);
}

#ifdef DEBUG
assert(sz < BATCH_SZ - BATCH_RESERVED);
#endif
if (intel_batchbuffer_space(brw) < sz)
intel_batchbuffer_flush(brw);

enum brw_gpu_ring prev_ring = brw->batch.ring;
/* The intel_batchbuffer_flush() calls above might have changed
* brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end.
*/
brw->batch.ring = ring;

if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING))
intel_batchbuffer_emit_render_ring_prelude(brw);
}

static inline void
intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring)
{

Завантаження…
Відмінити
Зберегти