|
|
@@ -480,6 +480,65 @@ upload_format_size(uint32_t upload_format) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static UNUSED uint16_t |
|
|
|
pinned_bo_high_bits(struct brw_bo *bo) |
|
|
|
{ |
|
|
|
return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* The VF cache designers apparently cut corners, and made the cache key's |
|
|
|
* <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits |
|
|
|
* of the address. If you happen to have two vertex buffers which get placed |
|
|
|
* exactly 4 GiB apart and use them in back-to-back draw calls, you can get |
|
|
|
* collisions. (These collisions can happen within a single batch.) |
|
|
|
* |
|
|
|
* In the soft-pin world, we'd like to assign addresses up front, and never |
|
|
|
* move buffers. So, we need to do a VF cache invalidate if the buffer for |
|
|
|
* a particular VB slot has different [48:32] address bits than the last one. |
|
|
|
* |
|
|
|
* In the relocation world, we have no idea what the addresses will be, so |
|
|
|
* we can't apply this workaround. Instead, we tell the kernel to move it |
|
|
|
* to the low 4GB regardless. |
|
|
|
*/ |
|
|
|
static void |
|
|
|
vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) |
|
|
|
{ |
|
|
|
#if GEN_GEN >= 8 |
|
|
|
bool need_invalidate = true; |
|
|
|
unsigned i; |
|
|
|
|
|
|
|
for (i = 0; i < brw->vb.nr_buffers; i++) { |
|
|
|
uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo); |
|
|
|
|
|
|
|
if (high_bits != brw->vb.last_bo_high_bits[i]) { |
|
|
|
need_invalidate = true; |
|
|
|
brw->vb.last_bo_high_bits[i] = high_bits; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Don't bother with draw parameter buffers - those are generated by |
|
|
|
* the driver so we can select a consistent memory zone. |
|
|
|
*/ |
|
|
|
|
|
|
|
if (need_invalidate) { |
|
|
|
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); |
|
|
|
} |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
vf_invalidate_for_ib_48bit_transition(struct brw_context *brw) |
|
|
|
{ |
|
|
|
#if GEN_GEN >= 8 |
|
|
|
uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo); |
|
|
|
|
|
|
|
if (high_bits != brw->ib.last_bo_high_bits) { |
|
|
|
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); |
|
|
|
brw->ib.last_bo_high_bits = high_bits; |
|
|
|
} |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
genX(emit_vertices)(struct brw_context *brw) |
|
|
|
{ |
|
|
@@ -594,6 +653,8 @@ genX(emit_vertices)(struct brw_context *brw) |
|
|
|
const unsigned nr_buffers = brw->vb.nr_buffers + |
|
|
|
uses_draw_params + uses_derived_draw_params; |
|
|
|
|
|
|
|
vf_invalidate_for_vb_48bit_transitions(brw); |
|
|
|
|
|
|
|
if (nr_buffers) { |
|
|
|
assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17)); |
|
|
|
|
|
|
@@ -886,6 +947,8 @@ genX(emit_index_buffer)(struct brw_context *brw) |
|
|
|
if (index_buffer == NULL) |
|
|
|
return; |
|
|
|
|
|
|
|
vf_invalidate_for_ib_48bit_transition(brw); |
|
|
|
|
|
|
|
brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) { |
|
|
|
#if GEN_GEN < 8 && !GEN_IS_HASWELL |
|
|
|
ib.CutIndexEnable = brw->prim_restart.enable_cut_index; |