Procházet zdrojové kódy

nouveau: avoid relocations where possible.

Potential relocations are emitted as NOPs where they're needed.  In the
event a buffer moves, the pushbuf code will emit the relevant state
changes into the NOPs.

Just a start, more work is needed to get this looking how I want it to.
tags/mesa_20090313
Ben Skeggs před 17 roky
rodič
revize
705022f98c

+ 1
- 2
src/mesa/drivers/dri/nouveau_winsys/nouveau_local.h Zobrazit soubor

@@ -61,9 +61,8 @@
} while(0)

#define OUT_RELOC(buf,data,flags,vor,tor) do { \
nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur, \
nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur++, \
buf, (data), (flags), (vor), (tor)); \
OUT_RING(0); \
} while(0)

/* Raw data + flags depending on FB/TT buffer */

+ 38
- 23
src/mesa/drivers/dri/nouveau_winsys/nouveau_pushbuf.c Zobrazit soubor

@@ -96,6 +96,31 @@ nouveau_pushbuf_init(struct nouveau_channel *chan)
return 0;
}

static uint32_t
nouveau_pushbuf_calc_reloc(struct nouveau_bo *bo,
struct nouveau_pushbuf_reloc *r)
{
uint32_t push;

if (r->flags & NOUVEAU_BO_LOW) {
push = bo->offset + r->data;
} else
if (r->flags & NOUVEAU_BO_HIGH) {
push = (bo->offset + r->data) >> 32;
} else {
push = r->data;
}

if (r->flags & NOUVEAU_BO_OR) {
if (bo->flags & NOUVEAU_BO_VRAM)
push |= r->vor;
else
push |= r->tor;
}

return push;
}

/* This would be our TTM "superioctl" */
int
nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min)
@@ -133,34 +158,20 @@ nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min)

if (bo->offset == nouveau_bo(bo)->offset &&
bo->flags == nouveau_bo(bo)->flags) {
/*XXX: could avoid reloc in this case, except with the
* current design we'd confuse the GPU quite a bit
* if we did this. Will fix soon.
*/
while ((r = ptr_to_pbrel(pbbo->relocs))) {
pbbo->relocs = r->next;
free(r);
}

nvpb->buffers = pbbo->next;
free(pbbo);
continue;
}
bo->offset = nouveau_bo(bo)->offset;
bo->flags = nouveau_bo(bo)->flags;

while ((r = ptr_to_pbrel(pbbo->relocs))) {
uint32_t push;

if (r->flags & NOUVEAU_BO_LOW) {
push = bo->offset + r->data;
} else
if (r->flags & NOUVEAU_BO_HIGH) {
push = (bo->offset + r->data) >> 32;
} else {
push = r->data;
}

if (r->flags & NOUVEAU_BO_OR) {
if (bo->flags & NOUVEAU_BO_VRAM)
push |= r->vor;
else
push |= r->tor;
}

*r->ptr = push;
*r->ptr = nouveau_pushbuf_calc_reloc(bo, r);
pbbo->relocs = r->next;
free(r);
}
@@ -241,6 +252,10 @@ nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr,
r->vor = vor;
r->tor = tor;

if (flags & NOUVEAU_BO_DUMMY)
*(uint32_t *)ptr = 0;
else
*(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r);
return 0;
}


+ 1
- 0
src/mesa/pipe/nouveau/nouveau_bo.h Zobrazit soubor

@@ -35,6 +35,7 @@
#define NOUVEAU_BO_HIGH (1 << 7)
#define NOUVEAU_BO_OR (1 << 8)
#define NOUVEAU_BO_LOCAL (1 << 9)
#define NOUVEAU_BO_DUMMY (1 << 31)

struct nouveau_bo {
struct nouveau_device *device;

+ 11
- 2
src/mesa/pipe/nouveau/nouveau_push.h Zobrazit soubor

@@ -44,9 +44,8 @@
#define OUT_RELOC(bo,data,flags,vor,tor) do { \
NOUVEAU_PUSH_CONTEXT(pc); \
pc->nvws->push_reloc(pc->nvws->channel, \
pc->nvws->channel->pushbuf->cur, \
pc->nvws->channel->pushbuf->cur++, \
(bo), (data), (flags), (vor), (tor)); \
OUT_RING(0); \
} while(0)

/* Raw data + flags depending on FB/TT buffer */
@@ -71,4 +70,14 @@
OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
} while(0)

/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
NOUVEAU_PUSH_CONTEXT(pc); \
if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \
OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
(flags), 0, 0); \
pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
} while(0)

#endif

+ 5
- 0
src/mesa/pipe/nv40/nv40_fragprog.c Zobrazit soubor

@@ -815,6 +815,11 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
fp->on_hw = TRUE;
}

BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
NV40TCL_FP_ADDRESS_DMA1);
BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
OUT_RING (fp->fp_control);


+ 7
- 1
src/mesa/pipe/nv40/nv40_fragtex.c Zobrazit soubor

@@ -104,7 +104,13 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
nv40->tex[unit].buffer = nv40mt->buffer;
nv40->tex[unit].format = txf;

BEGIN_RING(curie, NV40TCL_TEX_WRAP(unit), 6);
BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8);
OUT_RELOCl(nv40->tex[unit].buffer, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCd(nv40->tex[unit].buffer, nv40->tex[unit].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
NV40TCL_TEX_FORMAT_DMA1);
OUT_RING (ps->wrap);
OUT_RING (NV40TCL_TEX_ENABLE_ENABLE | ps->en |
(0x00078000) /* mipmap related? */);

+ 25
- 7
src/mesa/pipe/nv40/nv40_state.c Zobrazit soubor

@@ -603,33 +603,51 @@ nv40_set_framebuffer_state(struct pipe_context *pipe,
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 1);
OUT_RING (rt[0]->pitch * rt[0]->cpp);
nv40->rt[0] = rt[0]->buffer;
BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2);
OUT_RING (rt[0]->pitch * rt[0]->cpp);
OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
BEGIN_RING(curie, NV40TCL_COLOR1_PITCH, 2);
OUT_RING (rt[1]->pitch * rt[1]->cpp);
nv40->rt[1] = rt[1]->buffer;
BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2);
OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (rt[1]->pitch * rt[1]->cpp);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
nv40->rt[2] = rt[2]->buffer;
BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1);
OUT_RING (rt[2]->pitch * rt[2]->cpp);
nv40->rt[2] = rt[2]->buffer;
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
nv40->rt[3] = rt[3]->buffer;
BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1);
OUT_RING (rt[3]->pitch * rt[3]->cpp);
nv40->rt[3] = rt[3]->buffer;
}

if (zeta_format) {
nv40->zeta = zeta->buffer;
BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1);
OUT_RING (zeta->pitch * zeta->cpp);
nv40->zeta = zeta->buffer;
}

nv40->rt_enable = rt_enable;

+ 82
- 62
src/mesa/pipe/nv40/nv40_state_emit.c Zobrazit soubor

@@ -1,94 +1,114 @@
#include "nv40_context.h"
#include "nv40_state.h"

void
nv40_emit_hw_state(struct nv40_context *nv40)
/* Emit relocs for every referenced buffer.
*
* This is to ensure the bufmgr has an accurate idea of how
* the buffer is used. These relocs appear in the push buffer as
* NOPs, and will only be turned into state changes if a buffer
* actually moves.
*/
static void
nv40_state_emit_dummy_relocs(struct nv40_context *nv40)
{
int i;

if (nv40->dirty & NV40_NEW_FRAGPROG) {
nv40_fragprog_bind(nv40, nv40->fragprog.current);
/*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */
}

if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) {
nv40_fragtex_bind(nv40);

BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (2);
BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (1);
nv40->dirty &= ~NV40_NEW_FRAGPROG;
}

if (nv40->dirty & NV40_NEW_VERTPROG) {
nv40_vertprog_bind(nv40, nv40->vertprog.current);
nv40->dirty &= ~NV40_NEW_VERTPROG;
}

nv40->dirty_samplers = 0;

/* Emit relocs for every referenced buffer.
* This is to ensure the bufmgr has an accurate idea of how
* the buffer is used. This isn't very efficient, but we don't
* seem to take a significant performance hit. Will be improved
* at some point. Vertex arrays are emitted by nv40_vbo.c
*/
unsigned rt_flags, tx_flags, fp_flags;
int i;
rt_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR | NOUVEAU_BO_DUMMY;
tx_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_DUMMY;
fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_DUMMY;

/* Render targets */
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR0_OFFSET, 1);
OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCm(nv40->rt[0], rt_flags,
curie, NV40TCL_DMA_COLOR0, 1);
OUT_RELOCo(nv40->rt[0], rt_flags);
OUT_RELOCm(nv40->rt[0], rt_flags,
curie, NV40TCL_COLOR0_OFFSET, 1);
OUT_RELOCl(nv40->rt[0], 0, rt_flags);
}

if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 1);
OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCm(nv40->rt[1], rt_flags,
curie, NV40TCL_DMA_COLOR1, 1);
OUT_RELOCo(nv40->rt[1], rt_flags);
OUT_RELOCm(nv40->rt[1], rt_flags,
curie, NV40TCL_COLOR1_OFFSET, 1);
OUT_RELOCl(nv40->rt[1], 0, rt_flags);
}

if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCm(nv40->rt[2], rt_flags,
curie, NV40TCL_DMA_COLOR2, 1);
OUT_RELOCo(nv40->rt[2], rt_flags);
OUT_RELOCm(nv40->rt[2], rt_flags,
curie, NV40TCL_COLOR2_OFFSET, 1);
OUT_RELOCl(nv40->rt[2], 0, rt_flags);
}

if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCm(nv40->rt[3], rt_flags,
curie, NV40TCL_DMA_COLOR3, 1);
OUT_RELOCo(nv40->rt[3], rt_flags);
OUT_RELOCm(nv40->rt[3], rt_flags,
curie, NV40TCL_COLOR3_OFFSET, 1);
OUT_RELOCl(nv40->rt[3], 0, rt_flags);
}

if (nv40->zeta) {
BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCm(nv40->zeta, rt_flags, curie, NV40TCL_DMA_ZETA, 1);
OUT_RELOCo(nv40->zeta, rt_flags);
OUT_RELOCm(nv40->zeta, rt_flags, curie, NV40TCL_ZETA_OFFSET, 1);
OUT_RELOCl(nv40->zeta, 0, rt_flags);
}

/* Texture images */
for (i = 0; i < 16; i++) {
if (!(nv40->fp_samplers & (1 << i)))
continue;
BEGIN_RING(curie, NV40TCL_TEX_OFFSET(i), 2);
OUT_RELOCl(nv40->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCm(nv40->tex[i].buffer, tx_flags,
curie, NV40TCL_TEX_OFFSET(i), 2);
OUT_RELOCl(nv40->tex[i].buffer, 0, tx_flags);
OUT_RELOCd(nv40->tex[i].buffer, nv40->tex[i].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
tx_flags | NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
NV40TCL_TEX_FORMAT_DMA1);
}

/* Fragment program */
BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
OUT_RELOC (nv40->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
NV40TCL_FP_ADDRESS_DMA1);
OUT_RELOCm(nv40->fragprog.active->buffer, fp_flags,
curie, NV40TCL_FP_ADDRESS, 1);
OUT_RELOC (nv40->fragprog.active->buffer, 0,
fp_flags | NOUVEAU_BO_OR | NOUVEAU_BO_LOW,
NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
}

void
nv40_emit_hw_state(struct nv40_context *nv40)
{
if (nv40->dirty & NV40_NEW_FRAGPROG) {
nv40_fragprog_bind(nv40, nv40->fragprog.current);
/*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */
}

if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) {
nv40_fragtex_bind(nv40);

BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (2);
BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (1);
nv40->dirty &= ~NV40_NEW_FRAGPROG;
}

if (nv40->dirty & NV40_NEW_VERTPROG) {
nv40_vertprog_bind(nv40, nv40->vertprog.current);
nv40->dirty &= ~NV40_NEW_VERTPROG;
}

nv40->dirty_samplers = 0;

nv40_state_emit_dummy_relocs(nv40);
}


Načítá se…
Zrušit
Uložit