| @@ -27,6 +27,7 @@ C_SOURCES = \ | |||
| nvc0_pc_regalloc.c \ | |||
| nvc0_push.c \ | |||
| nvc0_push2.c \ | |||
| nvc0_fence.c | |||
| nvc0_fence.c \ | |||
| nvc0_mm.c | |||
| include ../../Makefile.template | |||
| @@ -29,6 +29,7 @@ nvc0 = env.ConvenienceLibrary( | |||
| 'nvc0_push.c', | |||
| 'nvc0_push2.c', | |||
| 'nvc0_fence.c', | |||
| 'nvc0_mm' | |||
| ]) | |||
| Export('nvc0') | |||
| @@ -11,46 +11,116 @@ | |||
| #include "nvc0_context.h" | |||
| #include "nvc0_resource.h" | |||
| #define NVC0_BUFFER_STATUS_USER_MEMORY 0xff | |||
| static INLINE boolean | |||
| nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, | |||
| unsigned domain) | |||
| { | |||
| if (domain == NOUVEAU_BO_VRAM) { | |||
| buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo, | |||
| &buf->offset); | |||
| if (!buf->bo) | |||
| return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART); | |||
| } else | |||
| if (domain == NOUVEAU_BO_GART) { | |||
| buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo, | |||
| &buf->offset); | |||
| if (!buf->bo) | |||
| return FALSE; | |||
| } else { | |||
| assert(!domain); | |||
| if (!buf->data) | |||
| buf->data = MALLOC(buf->base.width0); | |||
| if (!buf->data) | |||
| return FALSE; | |||
| } | |||
| buf->domain = domain; | |||
| return TRUE; | |||
| } | |||
| static INLINE void | |||
| release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) | |||
| { | |||
| (*mm)->next = fence->buffers; | |||
| fence->buffers = (*mm); | |||
| (*mm) = NULL; | |||
| } | |||
| static void | |||
| nvc0_buffer_destroy(struct pipe_screen *pscreen, | |||
| struct pipe_resource *presource) | |||
| { | |||
| struct nvc0_screen *screen = nvc0_screen(pscreen); | |||
| struct nvc0_resource *res = nvc0_resource(presource); | |||
| if (res->bo) | |||
| nouveau_screen_bo_release(pscreen, res->bo); | |||
| nouveau_bo_ref(NULL, &res->bo); | |||
| if (res->data) | |||
| if (res->mm) | |||
| release_allocation(&res->mm, screen->fence.current); | |||
| if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data) | |||
| FREE(res->data); | |||
| FREE(res); | |||
| } | |||
| static INLINE uint32_t | |||
| nouveau_buffer_rw_flags(unsigned pipe) | |||
| { | |||
| uint32_t flags = 0; | |||
| if (pipe & PIPE_TRANSFER_READ) | |||
| flags = NOUVEAU_BO_RD; | |||
| if (pipe & PIPE_TRANSFER_WRITE) | |||
| flags |= NOUVEAU_BO_WR; | |||
| return flags; | |||
| } | |||
| static void * | |||
| nvc0_buffer_transfer_map(struct pipe_context *pipe, | |||
| struct pipe_transfer *transfer) | |||
| { | |||
| struct nvc0_resource *res = nvc0_resource(transfer->resource); | |||
| struct nvc0_fence *fence; | |||
| uint8_t *map; | |||
| uint32_t flags; | |||
| int ret; | |||
| uint32_t flags = nouveau_buffer_rw_flags(transfer->usage); | |||
| if (res->base.bind & PIPE_BIND_VERTEX_BUFFER) | |||
| if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) && | |||
| (flags & NOUVEAU_BO_WR)) | |||
| nvc0_context(pipe)->vbo_dirty = TRUE; | |||
| // #ifdef NOUVEAU_USERPSACE_MM | |||
| if (res->base.bind & PIPE_BIND_CONSTANT_BUFFER) | |||
| if (res->domain == 0) | |||
| return res->data + transfer->box.x; | |||
| // #endif | |||
| flags = nouveau_screen_transfer_flags(transfer->usage); | |||
| map = nouveau_screen_bo_map_range(pipe->screen, | |||
| res->bo, | |||
| transfer->box.x, transfer->box.width, | |||
| flags); | |||
| if (!map) | |||
| if (res->domain == NOUVEAU_BO_VRAM) { | |||
| NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n"); | |||
| /* if this happens, migrate back to GART */ | |||
| return NULL; | |||
| } | |||
| if (res->score > -1024) | |||
| --res->score; | |||
| ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC); | |||
| if (ret) | |||
| return NULL; | |||
| map = res->bo->map; | |||
| nouveau_bo_unmap(res->bo); | |||
| fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence; | |||
| return map + transfer->box.x; | |||
| if (fence) { | |||
| if (nvc0_fence_wait(fence) == FALSE) | |||
| NOUVEAU_ERR("failed to fence buffer\n"); | |||
| nvc0_fence_reference(&res->fence, NULL); | |||
| nvc0_fence_reference(&res->fence_wr, NULL); | |||
| } | |||
| return map + transfer->box.x + res->offset; | |||
| } | |||
| @@ -62,13 +132,12 @@ nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, | |||
| { | |||
| struct nvc0_resource *res = nvc0_resource(transfer->resource); | |||
| #ifdef NOUVEAU_USERPSACE_MM | |||
| if (!res->bo) | |||
| return; | |||
| #endif | |||
| nouveau_screen_bo_map_flush_range(pipe->screen, | |||
| res->bo, | |||
| transfer->box.x + box->x, | |||
| res->offset + transfer->box.x + box->x, | |||
| box->width); | |||
| } | |||
| @@ -78,11 +147,10 @@ nvc0_buffer_transfer_unmap(struct pipe_context *pipe, | |||
| { | |||
| struct nvc0_resource *res = nvc0_resource(transfer->resource); | |||
| // #ifdef NOUVEAU_USERPSACE_MM | |||
| if (res->data) | |||
| return; | |||
| // #endif | |||
| nouveau_screen_bo_unmap(pipe->screen, res->bo); | |||
| /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */ | |||
| } | |||
| const struct u_resource_vtbl nvc0_buffer_vtbl = | |||
| @@ -102,7 +170,9 @@ struct pipe_resource * | |||
| nvc0_buffer_create(struct pipe_screen *pscreen, | |||
| const struct pipe_resource *templ) | |||
| { | |||
| struct nvc0_screen *screen = nvc0_screen(pscreen); | |||
| struct nvc0_resource *buffer; | |||
| boolean ret; | |||
| buffer = CALLOC_STRUCT(nvc0_resource); | |||
| if (!buffer) | |||
| @@ -114,14 +184,11 @@ nvc0_buffer_create(struct pipe_screen *pscreen, | |||
| buffer->base.screen = pscreen; | |||
| if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER) | |||
| buffer->data = MALLOC(buffer->base.width0); | |||
| buffer->bo = nouveau_screen_bo_new(pscreen, | |||
| 16, | |||
| buffer->base.usage, | |||
| buffer->base.bind, | |||
| buffer->base.width0); | |||
| if (buffer->bo == NULL) | |||
| ret = nvc0_buffer_allocate(screen, buffer, 0); | |||
| else | |||
| ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); | |||
| if (ret == FALSE) | |||
| goto fail; | |||
| return &buffer->base; | |||
| @@ -154,13 +221,77 @@ nvc0_user_buffer_create(struct pipe_screen *pscreen, | |||
| buffer->base.height0 = 1; | |||
| buffer->base.depth0 = 1; | |||
| buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); | |||
| if (!buffer->bo) | |||
| goto fail; | |||
| buffer->data = ptr; | |||
| buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY; | |||
| return &buffer->base; | |||
| } | |||
| fail: | |||
| FREE(buffer); | |||
| return NULL; | |||
| /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ | |||
| boolean | |||
| nvc0_buffer_migrate(struct nvc0_context *nvc0, | |||
| struct nvc0_resource *buf, unsigned domain) | |||
| { | |||
| struct nvc0_screen *screen = nvc0_screen(buf->base.screen); | |||
| struct nouveau_bo *bo; | |||
| unsigned size = buf->base.width0; | |||
| int ret; | |||
| if (domain == NOUVEAU_BO_GART && buf->domain == 0) { | |||
| if (!nvc0_buffer_allocate(screen, buf, domain)) | |||
| return FALSE; | |||
| ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); | |||
| if (ret) | |||
| return ret; | |||
| memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); | |||
| nouveau_bo_unmap(buf->bo); | |||
| } else | |||
| if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) { | |||
| struct nvc0_mm_allocation *mm = buf->mm; | |||
| bo = buf->bo; | |||
| buf->bo = NULL; | |||
| buf->mm = NULL; | |||
| nvc0_buffer_allocate(screen, buf, domain); | |||
| nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM, | |||
| bo, 0, NOUVEAU_BO_GART, buf->base.width0); | |||
| release_allocation(&mm, screen->fence.current); | |||
| nouveau_bo_ref(NULL, &bo); | |||
| } else | |||
| if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) { | |||
| /* should use a scratch buffer instead here */ | |||
| if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART)) | |||
| return FALSE; | |||
| return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM); | |||
| } else | |||
| return -1; | |||
| buf->domain = domain; | |||
| return TRUE; | |||
| } | |||
| /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. | |||
| * MUST NOT FLUSH THE PUSH BUFFER, we could be in the middle of a method. | |||
| */ | |||
| boolean | |||
| nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size) | |||
| { | |||
| struct nvc0_screen *screen = nvc0_screen(buf->base.screen); | |||
| int ret; | |||
| assert(buf->data && !buf->domain); | |||
| if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART)) | |||
| return FALSE; | |||
| ret = nouveau_bo_map_range(buf->bo, base + buf->offset, size, | |||
| NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); | |||
| if (ret) | |||
| return FALSE; | |||
| memcpy(buf->bo->map, buf->data + base, size); | |||
| nouveau_bo_unmap(buf->bo); | |||
| return TRUE; | |||
| } | |||
| @@ -49,6 +49,8 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, | |||
| if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { | |||
| FIRE_RING(chan); | |||
| nvc0_screen_fence_next(nvc0->screen); | |||
| } | |||
| } | |||
| @@ -118,6 +118,7 @@ struct nvc0_context { | |||
| unsigned sample_mask; | |||
| boolean vbo_dirty; | |||
| boolean vbo_push_hint; | |||
| struct draw_context *draw; | |||
| }; | |||
| @@ -150,6 +151,7 @@ static INLINE void | |||
| nvc0_make_buffer_resident(struct nvc0_context *nvc0, | |||
| struct nvc0_resource *res, unsigned flags) | |||
| { | |||
| nvc0_resource_validate(res, flags); | |||
| nvc0_make_bo_resident(nvc0, res->bo, flags); | |||
| } | |||
| @@ -30,14 +30,14 @@ | |||
| boolean | |||
| nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence, | |||
| boolean emit) | |||
| boolean emit) | |||
| { | |||
| *fence = CALLOC_STRUCT(nvc0_fence); | |||
| if (!*fence) | |||
| return FALSE; | |||
| (*fence)->screen = screen; | |||
| pipe_reference_init(&(*fence)->reference, 1); | |||
| (*fence)->ref = 1; | |||
| if (emit) | |||
| nvc0_fence_emit(*fence); | |||
| @@ -53,15 +53,15 @@ nvc0_fence_emit(struct nvc0_fence *fence) | |||
| fence->sequence = ++screen->fence.sequence; | |||
| assert(!(fence->state & NVC0_FENCE_STATE_EMITTED)); | |||
| assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); | |||
| BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); | |||
| OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); | |||
| OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); | |||
| OUT_RING (chan, fence->sequence); | |||
| OUT_RING (chan, 0x1000f010); | |||
| OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE); | |||
| pipe_reference(NULL, &fence->reference); | |||
| ++fence->ref; | |||
| if (screen->fence.tail) | |||
| screen->fence.tail->next = fence; | |||
| @@ -94,6 +94,18 @@ nvc0_fence_del(struct nvc0_fence *fence) | |||
| FREE(fence); | |||
| } | |||
| static void | |||
| nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) | |||
| { | |||
| struct nvc0_mm_allocation *alloc = fence->buffers; | |||
| while (alloc) { | |||
| struct nvc0_mm_allocation *next = alloc->next; | |||
| nvc0_mm_free(alloc); | |||
| alloc = next; | |||
| }; | |||
| } | |||
| static void | |||
| nvc0_screen_fence_update(struct nvc0_screen *screen) | |||
| { | |||
| @@ -110,10 +122,12 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) | |||
| sequence = fence->sequence; | |||
| fence->state = NVC0_FENCE_STATE_SIGNALLED; | |||
| if (fence->trigger.func) | |||
| fence->trigger.func(fence->trigger.arg); | |||
| if (fence->buffers) | |||
| nvc0_fence_trigger_release_buffers(fence); | |||
| nvc0_fence_reference(&fence, NULL); | |||
| if (sequence == screen->fence.sequence_ack) | |||
| break; | |||
| } | |||
| @@ -122,24 +136,45 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) | |||
| screen->fence.tail = NULL; | |||
| } | |||
| #define NVC0_FENCE_MAX_SPINS (1 << 17) | |||
| boolean | |||
| nvc0_fence_wait(struct nvc0_fence *fence) | |||
| { | |||
| struct nvc0_screen *screen = fence->screen; | |||
| int spins = 0; | |||
| if (fence->state != NVC0_FENCE_STATE_EMITTED) | |||
| return TRUE; | |||
| if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { | |||
| nvc0_fence_emit(fence); | |||
| FIRE_RING(screen->base.channel); | |||
| if (fence == screen->fence.current) | |||
| nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); | |||
| } | |||
| do { | |||
| nvc0_screen_fence_update(screen); | |||
| if (fence->state == NVC0_FENCE_STATE_SIGNALLED) | |||
| return TRUE; | |||
| spins++; | |||
| #ifdef PIPE_OS_UNIX | |||
| if ((spins & 7) == 7) /* spend a few cycles */ | |||
| if (!(spins % 8)) /* donate a few cycles */ | |||
| sched_yield(); | |||
| #endif | |||
| } while (++spins < 10000); | |||
| } while (spins < NVC0_FENCE_MAX_SPINS); | |||
| if (spins > 9000) | |||
| NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); | |||
| return FALSE; | |||
| } | |||
| void | |||
| nvc0_screen_fence_next(struct nvc0_screen *screen) | |||
| { | |||
| nvc0_fence_emit(screen->fence.current); | |||
| nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); | |||
| nvc0_screen_fence_update(screen); | |||
| } | |||
| @@ -3,24 +3,21 @@ | |||
| #define __NVC0_FENCE_H__ | |||
| #include "util/u_inlines.h" | |||
| #include "util/u_double_list.h" | |||
| struct nvc0_fence_trigger { | |||
| void (*func)(void *); | |||
| void *arg; | |||
| struct nvc0_fence_trigger *next; | |||
| }; | |||
| #define NVC0_FENCE_STATE_AVAILABLE 0 | |||
| #define NVC0_FENCE_STATE_EMITTED 1 | |||
| #define NVC0_FENCE_STATE_SIGNALLED 2 | |||
| /* reference first, so pipe_reference works directly */ | |||
| struct nvc0_mm_allocation; | |||
| struct nvc0_fence { | |||
| struct pipe_reference reference; | |||
| struct nvc0_fence *next; | |||
| struct nvc0_screen *screen; | |||
| int state; | |||
| int ref; | |||
| uint32_t sequence; | |||
| struct nvc0_fence_trigger trigger; | |||
| struct nvc0_mm_allocation *buffers; | |||
| }; | |||
| void nvc0_fence_emit(struct nvc0_fence *); | |||
| @@ -31,10 +28,20 @@ boolean nvc0_fence_wait(struct nvc0_fence *); | |||
| static INLINE void | |||
| nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) | |||
| { | |||
| if (pipe_reference(&(*ref)->reference, &fence->reference)) | |||
| nvc0_fence_del(*ref); | |||
| if (*ref) { | |||
| if (--(*ref)->ref == 0) | |||
| nvc0_fence_del(*ref); | |||
| } | |||
| if (fence) | |||
| ++fence->ref; | |||
| *ref = fence; | |||
| } | |||
| static INLINE struct nvc0_fence * | |||
| nvc0_fence(struct pipe_fence_handle *fence) | |||
| { | |||
| return (struct nvc0_fence *)fence; | |||
| } | |||
| #endif // __NVC0_FENCE_H__ | |||
| @@ -63,7 +63,7 @@ static const uint32_t nvc0_9097_vertex_array_select[] = | |||
| static const uint32_t nvc0_9097_color_mask_brdc[] = | |||
| { | |||
| 0x05a00021, /* maddr [0x1680] */ | |||
| 0x05a00021, /* maddr [0x1a00, increment = 4] */ | |||
| 0x00000841, /* send $r1 */ | |||
| 0x00000841, /* send $r1 */ | |||
| 0x00000841, /* send $r1 */ | |||
| @@ -0,0 +1,245 @@ | |||
| #include "util/u_inlines.h" | |||
| #include "util/u_memory.h" | |||
| #include "util/u_double_list.h" | |||
| #include "nvc0_screen.h" | |||
| #define MM_MIN_ORDER 7 | |||
| #define MM_MAX_ORDER 20 | |||
| #define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) | |||
| #define MM_MIN_SIZE (1 << MM_MIN_ORDER) | |||
| #define MM_MAX_SIZE (1 << MM_MAX_ORDER) | |||
| struct mm_bucket { | |||
| struct list_head free; | |||
| struct list_head used; | |||
| struct list_head full; | |||
| int num_free; | |||
| }; | |||
| struct nvc0_mman { | |||
| struct nouveau_device *dev; | |||
| struct mm_bucket bucket[MM_NUM_BUCKETS]; | |||
| uint32_t storage_type; | |||
| uint32_t domain; | |||
| uint64_t allocated; | |||
| }; | |||
| struct mm_slab { | |||
| struct list_head head; | |||
| struct nouveau_bo *bo; | |||
| struct nvc0_mman *cache; | |||
| int order; | |||
| int count; | |||
| int free; | |||
| uint32_t bits[0]; | |||
| }; | |||
| static int | |||
| mm_slab_alloc(struct mm_slab *slab) | |||
| { | |||
| int i, n, b; | |||
| if (slab->free == 0) | |||
| return -1; | |||
| for (i = 0; i < (slab->count + 31) / 32; ++i) { | |||
| b = ffs(slab->bits[i]) - 1; | |||
| if (b >= 0) { | |||
| n = i * 32 + b; | |||
| assert(n < slab->count); | |||
| slab->free--; | |||
| slab->bits[i] &= ~(1 << b); | |||
| return n; | |||
| } | |||
| } | |||
| return -1; | |||
| } | |||
| static INLINE void | |||
| mm_slab_free(struct mm_slab *slab, int i) | |||
| { | |||
| assert(i < slab->count); | |||
| slab->bits[i / 32] |= 1 << (i % 32); | |||
| slab->free++; | |||
| assert(slab->free <= slab->count); | |||
| } | |||
| static INLINE int | |||
| mm_get_order(uint32_t size) | |||
| { | |||
| int s = __builtin_clz(size) ^ 31; | |||
| if (size > (1 << s)) | |||
| s += 1; | |||
| return s; | |||
| } | |||
| static struct mm_bucket * | |||
| mm_bucket_by_order(struct nvc0_mman *cache, int order) | |||
| { | |||
| if (order > MM_MAX_ORDER) | |||
| return NULL; | |||
| return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; | |||
| } | |||
| static struct mm_bucket * | |||
| mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) | |||
| { | |||
| return mm_bucket_by_order(cache, mm_get_order(size)); | |||
| } | |||
| /* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ | |||
| static INLINE uint32_t | |||
| mm_default_slab_size(unsigned chunk_order) | |||
| { | |||
| assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); | |||
| static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = | |||
| { | |||
| 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 | |||
| }; | |||
| return 1 << slab_order[chunk_order - MM_MIN_ORDER]; | |||
| } | |||
| static int | |||
| mm_slab_new(struct nvc0_mman *cache, int chunk_order) | |||
| { | |||
| struct mm_slab *slab; | |||
| int words, ret; | |||
| const uint32_t size = mm_default_slab_size(chunk_order); | |||
| words = ((size >> chunk_order) + 31) / 32; | |||
| assert(words); | |||
| slab = MALLOC(sizeof(struct mm_slab) + words * 4); | |||
| if (!slab) | |||
| return PIPE_ERROR_OUT_OF_MEMORY; | |||
| memset(&slab->bits[0], ~0, words * 4); | |||
| slab->bo = NULL; | |||
| ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, | |||
| 0, cache->storage_type, &slab->bo); | |||
| if (ret) { | |||
| FREE(slab); | |||
| return PIPE_ERROR_OUT_OF_MEMORY; | |||
| } | |||
| LIST_INITHEAD(&slab->head); | |||
| slab->cache = cache; | |||
| slab->order = chunk_order; | |||
| slab->count = slab->free = size >> chunk_order; | |||
| LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); | |||
| cache->allocated += size; | |||
| debug_printf("MM: new slab, total memory = %lu KiB\n", | |||
| cache->allocated / 1024); | |||
| return PIPE_OK; | |||
| } | |||
| /* @return token to identify slab or NULL if we just allocated a new bo */ | |||
| struct nvc0_mm_allocation * | |||
| nvc0_mm_allocate(struct nvc0_mman *cache, | |||
| uint32_t size, struct nouveau_bo **bo, uint32_t *offset) | |||
| { | |||
| struct mm_bucket *bucket; | |||
| struct mm_slab *slab; | |||
| struct nvc0_mm_allocation *alloc; | |||
| int ret; | |||
| bucket = mm_bucket_by_size(cache, size); | |||
| if (!bucket) { | |||
| ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, | |||
| 0, cache->storage_type, bo); | |||
| if (ret) | |||
| debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); | |||
| *offset = 0; | |||
| return NULL; | |||
| } | |||
| if (!LIST_IS_EMPTY(&bucket->used)) { | |||
| slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); | |||
| } else { | |||
| if (LIST_IS_EMPTY(&bucket->free)) { | |||
| mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); | |||
| } | |||
| slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); | |||
| LIST_DEL(&slab->head); | |||
| LIST_ADD(&slab->head, &bucket->used); | |||
| } | |||
| *offset = mm_slab_alloc(slab) << slab->order; | |||
| alloc = MALLOC_STRUCT(nvc0_mm_allocation); | |||
| if (!alloc) | |||
| return NULL; | |||
| nouveau_bo_ref(slab->bo, bo); | |||
| if (slab->free == 0) { | |||
| LIST_DEL(&slab->head); | |||
| LIST_ADD(&slab->head, &bucket->full); | |||
| } | |||
| alloc->next = NULL; | |||
| alloc->offset = *offset; | |||
| alloc->priv = (void *)slab; | |||
| return alloc; | |||
| } | |||
| void | |||
| nvc0_mm_free(struct nvc0_mm_allocation *alloc) | |||
| { | |||
| struct mm_slab *slab = (struct mm_slab *)alloc->priv; | |||
| struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); | |||
| mm_slab_free(slab, alloc->offset >> slab->order); | |||
| if (slab->free == 1) { | |||
| LIST_DEL(&slab->head); | |||
| if (slab->count > 1) | |||
| LIST_ADDTAIL(&slab->head, &bucket->used); | |||
| else | |||
| LIST_ADDTAIL(&slab->head, &bucket->free); | |||
| } | |||
| FREE(alloc); | |||
| } | |||
| struct nvc0_mman * | |||
| nvc0_mm_create(struct nouveau_device *dev, uint32_t domain, | |||
| uint32_t storage_type) | |||
| { | |||
| struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman); | |||
| int i; | |||
| if (!cache) | |||
| return NULL; | |||
| cache->dev = dev; | |||
| cache->domain = domain; | |||
| cache->storage_type = storage_type; | |||
| cache->allocated = 0; | |||
| for (i = 0; i < MM_NUM_BUCKETS; ++i) { | |||
| LIST_INITHEAD(&cache->bucket[i].free); | |||
| LIST_INITHEAD(&cache->bucket[i].used); | |||
| LIST_INITHEAD(&cache->bucket[i].full); | |||
| } | |||
| return cache; | |||
| } | |||
| @@ -215,9 +215,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) | |||
| struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; | |||
| struct nvc0_resource *res = nvc0_resource(vb->buffer); | |||
| if (nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) | |||
| return; | |||
| data = (uint8_t *)res->bo->map + vb->buffer_offset; | |||
| data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD); | |||
| if (info->indexed) | |||
| data += info->index_bias * vb->stride; | |||
| @@ -29,10 +29,43 @@ struct nvc0_resource { | |||
| uint8_t status; | |||
| uint8_t domain; | |||
| int16_t score; /* low if mapped very often, if high can move to VRAM */ | |||
| struct nvc0_fence *fence; | |||
| struct list_head list; | |||
| struct nvc0_fence *fence_wr; | |||
| struct nvc0_mm_allocation *mm; | |||
| }; | |||
| /* XXX: wait for fence (atm only using this for vertex push) */ | |||
| static INLINE void * | |||
| nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset, | |||
| uint32_t flags) | |||
| { | |||
| void *map; | |||
| if (res->domain == 0) | |||
| return res->data + offset; | |||
| if (nouveau_bo_map_range(res->bo, res->offset + offset, | |||
| res->base.width0, flags | NOUVEAU_BO_NOSYNC)) | |||
| return NULL; | |||
| /* With suballocation, the same bo can be mapped several times, so unmap | |||
| * immediately. Maps are guaranteed to persist. */ | |||
| map = res->bo->map; | |||
| nouveau_bo_unmap(res->bo); | |||
| return map; | |||
| } | |||
| static INLINE void | |||
| nvc0_resource_unmap(struct nvc0_resource *res) | |||
| { | |||
| if (res->domain != 0 && 0) | |||
| nouveau_bo_unmap(res->bo); | |||
| } | |||
| #define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf)) | |||
| #define NVC0_TILE_D(m) (1 << (m >> 8)) | |||
| @@ -67,7 +100,7 @@ nvc0_resource(struct pipe_resource *resource) | |||
| static INLINE boolean | |||
| nvc0_resource_mapped_by_gpu(struct pipe_resource *resource) | |||
| { | |||
| return nvc0_resource(resource)->bo->offset != 0ULL; | |||
| return nvc0_resource(resource)->domain != 0; | |||
| } | |||
| void | |||
| @@ -106,4 +139,13 @@ nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, | |||
| void | |||
| nvc0_miptree_surface_del(struct pipe_surface *ps); | |||
| struct nvc0_context; | |||
| boolean | |||
| nvc0_buffer_migrate(struct nvc0_context *, | |||
| struct nvc0_resource *, unsigned domain); | |||
| boolean | |||
| nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size); | |||
| #endif | |||
| @@ -229,12 +229,28 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, | |||
| return pos + size; | |||
| } | |||
| static void | |||
| nvc0_screen_fence_reference(struct pipe_screen *pscreen, | |||
| struct pipe_fence_handle **ptr, | |||
| struct pipe_fence_handle *fence) | |||
| { | |||
| nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence)); | |||
| } | |||
| static int | |||
| nvc0_screen_fence_signalled(struct pipe_screen *pscreen, | |||
| struct pipe_fence_handle *fence, | |||
| unsigned flags) | |||
| { | |||
| return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED); | |||
| } | |||
| static int | |||
| nvc0_screen_fence_finish(struct pipe_screen *pscreen, | |||
| struct pipe_fence_handle *pfence, | |||
| struct pipe_fence_handle *fence, | |||
| unsigned flags) | |||
| { | |||
| return nvc0_fence_wait((struct nvc0_fence *)pfence) != TRUE; | |||
| return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE; | |||
| } | |||
| static void | |||
| @@ -339,6 +355,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) | |||
| pscreen->get_param = nvc0_screen_get_param; | |||
| pscreen->get_shader_param = nvc0_screen_get_shader_param; | |||
| pscreen->get_paramf = nvc0_screen_get_paramf; | |||
| pscreen->fence_reference = nvc0_screen_fence_reference; | |||
| pscreen->fence_signalled = nvc0_screen_fence_signalled; | |||
| pscreen->fence_finish = nvc0_screen_fence_finish; | |||
| nvc0_screen_init_resource_functions(pscreen); | |||
| @@ -353,6 +371,18 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) | |||
| screen->fence.map = screen->fence.bo->map; | |||
| nouveau_bo_unmap(screen->fence.bo); | |||
| for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, | |||
| &screen->scratch.bo[i]); | |||
| if (ret) | |||
| goto fail; | |||
| } | |||
| for (i = 0; i < 8; ++i) { | |||
| BEGIN_RING(chan, (i << 13) | (0x0000 >> 2), 1); | |||
| OUT_RING (chan, 0x0000); | |||
| } | |||
| BEGIN_RING(chan, RING_MF_(0x0000), 1); | |||
| OUT_RING (chan, 0x9039); | |||
| BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); | |||
| @@ -510,15 +540,11 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) | |||
| BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); | |||
| OUT_RING (chan, 1); | |||
| // BEGIN_RING(chan, RING_3D(GP_SELECT), 1); | |||
| // OUT_RING (chan, 0x40); | |||
| BEGIN_RING(chan, RING_3D(SP_SELECT(4)), 1); | |||
| BEGIN_RING(chan, RING_3D(GP_SELECT), 1); | |||
| OUT_RING (chan, 0x40); | |||
| BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1); | |||
| OUT_RING (chan, 0); | |||
| // BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); | |||
| // OUT_RING (chan, 0x30); | |||
| BEGIN_RING(chan, RING_3D(SP_SELECT(3)), 1); | |||
| BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); | |||
| OUT_RING (chan, 0x30); | |||
| BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1); | |||
| OUT_RING (chan, 3); | |||
| @@ -538,18 +564,19 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) | |||
| BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); | |||
| OUT_RING (chan, 0xab); | |||
| OUT_RING (chan, 0x00000000); | |||
| BEGIN_RING(chan, RING_3D_(0x07e8), 2); | |||
| OUT_RING (chan, 0xac); | |||
| OUT_RING (chan, 0x00000000); | |||
| BEGIN_RING(chan, RING_3D_(0x07f0), 2); | |||
| OUT_RING (chan, 0xac); | |||
| OUT_RING (chan, 0x00000000); | |||
| FIRE_RING (chan); | |||
| screen->tic.entries = CALLOC(4096, sizeof(void *)); | |||
| screen->tsc.entries = screen->tic.entries + 2048; | |||
| screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, | |||
| 0x000); | |||
| screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); | |||
| screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); | |||
| nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); | |||
| return pscreen; | |||
| fail: | |||
| @@ -10,9 +10,13 @@ | |||
| #define NVC0_TIC_MAX_ENTRIES 2048 | |||
| #define NVC0_TSC_MAX_ENTRIES 2048 | |||
| struct nvc0_mman; | |||
| struct nvc0_context; | |||
| struct nvc0_fence; | |||
| #define NVC0_SCRATCH_SIZE (2 << 20) | |||
| #define NVC0_SCRATCH_NR_BUFFERS 2 | |||
| struct nvc0_screen { | |||
| struct nouveau_screen base; | |||
| struct nouveau_winsys *nvws; | |||
| @@ -29,6 +33,13 @@ struct nvc0_screen { | |||
| struct nouveau_resource *text_heap; | |||
| struct { | |||
| struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; | |||
| uint8_t *buf; | |||
| int index; | |||
| uint32_t offset; | |||
| } scratch; | |||
| struct { | |||
| void **entries; | |||
| int next; | |||
| @@ -50,6 +61,10 @@ struct nvc0_screen { | |||
| uint32_t sequence_ack; | |||
| struct nouveau_bo *bo; | |||
| } fence; | |||
| struct nvc0_mman *mm_GART; | |||
| struct nvc0_mman *mm_VRAM; | |||
| struct nvc0_mman *mm_VRAM_fe0; | |||
| }; | |||
| static INLINE struct nvc0_screen * | |||
| @@ -58,14 +73,60 @@ nvc0_screen(struct pipe_screen *screen) | |||
| return (struct nvc0_screen *)screen; | |||
| } | |||
| /* Since a resource can be migrated, we need to decouple allocations from | |||
| * them. This struct is linked with fences for delayed freeing of allocs. | |||
| */ | |||
| struct nvc0_mm_allocation { | |||
| struct nvc0_mm_allocation *next; | |||
| void *priv; | |||
| uint32_t offset; | |||
| }; | |||
| extern struct nvc0_mman * | |||
| nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); | |||
| extern struct nvc0_mm_allocation * | |||
| nvc0_mm_allocate(struct nvc0_mman *, | |||
| uint32_t size, struct nouveau_bo **, uint32_t *offset); | |||
| extern void | |||
| nvc0_mm_free(struct nvc0_mm_allocation *); | |||
| void nvc0_screen_make_buffers_resident(struct nvc0_screen *); | |||
| int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); | |||
| int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); | |||
| static INLINE void | |||
| nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) | |||
| { | |||
| struct nvc0_screen *screen = nvc0_screen(res->base.screen); | |||
| assert(res->mm); | |||
| nvc0_fence_reference(&res->fence, screen->fence.current); | |||
| if (flags & NOUVEAU_BO_WR) | |||
| nvc0_fence_reference(&res->fence_wr, screen->fence.current); | |||
| nouveau_reloc_emit(screen->base.channel, | |||
| NULL, 0, NULL, res->bo, 0, 0, NOUVEAU_BO_RDWR, 0, 0); | |||
| } | |||
| boolean | |||
| nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); | |||
| void | |||
| nvc0_screen_fence_next(struct nvc0_screen *); | |||
| static INLINE boolean | |||
| nvc0_screen_fence_emit(struct nvc0_screen *screen) | |||
| { | |||
| nvc0_fence_emit(screen->fence.current); | |||
| return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); | |||
| } | |||
| struct nvc0_format { | |||
| uint32_t rt; | |||
| uint32_t tic; | |||
| @@ -76,10 +76,10 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) | |||
| BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1); | |||
| OUT_RING (chan, vp->max_gpr); | |||
| BEGIN_RING(chan, RING_3D_(0x163c), 1); | |||
| OUT_RING (chan, 0); | |||
| BEGIN_RING(chan, RING_3D_(0x2600), 1); | |||
| OUT_RING (chan, 1); | |||
| // BEGIN_RING(chan, RING_3D_(0x163c), 1); | |||
| // OUT_RING (chan, 0); | |||
| // BEGIN_RING(chan, RING_3D_(0x2600), 1); | |||
| // OUT_RING (chan, 1); | |||
| } | |||
| void | |||
| @@ -22,6 +22,8 @@ | |||
| #include <unistd.h> | |||
| #define NOUVEAU_DEBUG 1 | |||
| #include "pipe/p_shader_tokens.h" | |||
| #include "tgsi/tgsi_parse.h" | |||
| #include "tgsi/tgsi_util.h" | |||
| @@ -194,7 +196,7 @@ static INLINE void | |||
| bld_warn_uninitialized(struct bld_context *bld, int kind, | |||
| struct bld_register *reg, struct nv_basic_block *b) | |||
| { | |||
| #ifdef NOUVEAU_DEBUG_BITS | |||
| #ifdef NOUVEAU_DEBUG | |||
| long i = (reg - &bld->tvs[0][0]) / 4; | |||
| long c = (reg - &bld->tvs[0][0]) & 3; | |||
| @@ -1359,7 +1361,7 @@ bld_instruction(struct bld_context *bld, | |||
| uint opcode = translate_opcode(insn->Instruction.Opcode); | |||
| uint8_t mask = insn->Dst[0].Register.WriteMask; | |||
| #ifdef NOUVEAU_DEBUG_BITS | |||
| #ifdef NOUVEAU_DEBUG | |||
| debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); | |||
| #endif | |||
| @@ -111,6 +111,8 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0, | |||
| uint32_t *src = (uint32_t *)data; | |||
| unsigned count = (size + 3) / 4; | |||
| MARK_RING (chan, 8, 2); | |||
| BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); | |||
| OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); | |||
| OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); | |||
| @@ -125,6 +127,7 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0, | |||
| if (nr < 9) { | |||
| FIRE_RING(chan); | |||
| nvc0_make_bo_resident(nvc0, dst, NOUVEAU_BO_WR); | |||
| continue; | |||
| } | |||
| nr = MIN2(count, nr - 1); | |||
| @@ -138,53 +141,90 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0, | |||
| } | |||
| } | |||
| void | |||
| nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, | |||
| struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, | |||
| struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, | |||
| unsigned size) | |||
| { | |||
| struct nouveau_channel *chan = nvc0->screen->base.channel; | |||
| while (size) { | |||
| unsigned bytes = MIN2(size, 1 << 17); | |||
| MARK_RING (chan, 11, 4); | |||
| BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); | |||
| OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); | |||
| OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); | |||
| BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); | |||
| OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); | |||
| OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); | |||
| BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); | |||
| OUT_RING (chan, bytes); | |||
| OUT_RING (chan, 1); | |||
| BEGIN_RING(chan, RING_MF(EXEC), 1); | |||
| OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | | |||
| NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); | |||
| srcoff += bytes; | |||
| dstoff += bytes; | |||
| size -= bytes; | |||
| } | |||
| } | |||
| static void | |||
| nvc0_sifc_push_rect(struct pipe_screen *pscreen, | |||
| const struct nvc0_m2mf_rect *dst, unsigned dst_format, | |||
| unsigned src_format, unsigned src_pitch, void *src, | |||
| nvc0_m2mf_push_rect(struct pipe_screen *pscreen, | |||
| const struct nvc0_m2mf_rect *dst, | |||
| const void *data, | |||
| unsigned nblocksx, unsigned nblocksy) | |||
| { | |||
| struct nouveau_channel *chan; | |||
| const uint8_t *src = (const uint8_t *)data; | |||
| const int cpp = dst->cpp; | |||
| const int line_len = nblocksx * cpp; | |||
| int dy = dst->y; | |||
| if (dst->bo->tile_flags) { | |||
| BEGIN_RING(chan, RING_2D(DST_FORMAT), 5); | |||
| OUT_RING (chan, dst_format); | |||
| OUT_RING (chan, 0); | |||
| OUT_RING (chan, dst->tile_mode); | |||
| OUT_RING (chan, 1); | |||
| OUT_RING (chan, 0); | |||
| } else { | |||
| BEGIN_RING(chan, RING_2D(DST_FORMAT), 2); | |||
| OUT_RING (chan, NV50_SURFACE_FORMAT_A8R8G8B8_UNORM); | |||
| OUT_RING (chan, 1); | |||
| BEGIN_RING(chan, RING_2D(DST_PITCH), 1); | |||
| OUT_RING (chan, dst->pitch); | |||
| } | |||
| assert(dst->bo->tile_flags); | |||
| BEGIN_RING(chan, RING_2D(DST_WIDTH), 4); | |||
| OUT_RING (chan, dst->width); | |||
| BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); | |||
| OUT_RING (chan, dst->tile_mode); | |||
| OUT_RING (chan, dst->width * cpp); | |||
| OUT_RING (chan, dst->height); | |||
| OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); | |||
| OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); | |||
| BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2); | |||
| OUT_RING (chan, 0); | |||
| OUT_RING (chan, src_format); | |||
| BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10); | |||
| OUT_RING (chan, nblocksx); | |||
| OUT_RING (chan, nblocksy); | |||
| OUT_RING (chan, 0); | |||
| OUT_RING (chan, 1); | |||
| OUT_RING (chan, 0); | |||
| OUT_RING (chan, 1); | |||
| OUT_RING (chan, 0); | |||
| OUT_RING (chan, dst->x); | |||
| OUT_RING (chan, 0); | |||
| OUT_RING (chan, dst->y); | |||
| OUT_RING (chan, dst->depth); | |||
| OUT_RING (chan, dst->z); | |||
| while (nblocksy) { | |||
| int line_count, words; | |||
| int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); | |||
| src = (uint8_t *)src + src_pitch; | |||
| if (size < (12 + words)) { | |||
| FIRE_RING(chan); | |||
| continue; | |||
| } | |||
| line_count = (size * 4) / line_len; | |||
| words = (line_count * line_len + 3) / 4; | |||
| BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); | |||
| OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); | |||
| OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); | |||
| BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); | |||
| OUT_RING (chan, dst->x * cpp); | |||
| OUT_RING (chan, dy); | |||
| BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); | |||
| OUT_RING (chan, line_len); | |||
| OUT_RING (chan, line_count); | |||
| BEGIN_RING(chan, RING_MF(EXEC), 1); | |||
| OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | | |||
| NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); | |||
| BEGIN_RING(chan, RING_MF(DATA), words); | |||
| OUT_RINGp (chan, src, words); | |||
| dy += line_count; | |||
| src += line_len * line_count; | |||
| nblocksy -= line_count; | |||
| } | |||
| } | |||
| @@ -242,6 +282,11 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, | |||
| tx->rect[0].pitch = lvl->pitch; | |||
| tx->rect[0].domain = NOUVEAU_BO_VRAM; | |||
| if (!(usage & PIPE_TRANSFER_READ) && | |||
| (res->depth0 == 1) && (tx->nblocksy * tx->base.stride < 512 * 4)) { | |||
| /* don't allocate scratch buffer, upload through FIFO */ | |||
| } | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, | |||
| tx->nblocksy * tx->base.stride, &tx->rect[1].bo); | |||
| if (ret) { | |||
| @@ -131,8 +131,16 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) | |||
| ve = &vertex->element[i]; | |||
| vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; | |||
| if (!nvc0_resource_mapped_by_gpu(vb->buffer)) | |||
| nvc0->vbo_fifo |= 1 << i; | |||
| if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { | |||
| if (nvc0->vbo_push_hint) { | |||
| nvc0->vbo_fifo |= 1 << i; | |||
| } else { | |||
| nvc0_migrate_vertices(nvc0_resource(vb->buffer), | |||
| vb->buffer_offset, | |||
| vb->buffer->width0 - vb->buffer_offset); | |||
| nvc0->vbo_dirty = TRUE; | |||
| } | |||
| } | |||
| if (1 || likely(vb->stride)) { | |||
| OUT_RING(chan, ve->state); | |||
| @@ -142,7 +150,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) | |||
| } | |||
| for (i = 0; i < vertex->num_elements; ++i) { | |||
| struct nouveau_bo *bo; | |||
| struct nvc0_resource *res; | |||
| unsigned size, offset; | |||
| ve = &vertex->element[i]; | |||
| @@ -158,7 +166,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) | |||
| continue; | |||
| } | |||
| bo = nvc0_resource(vb->buffer)->bo; | |||
| res = nvc0_resource(vb->buffer); | |||
| size = vb->buffer->width0; | |||
| offset = ve->pipe.src_offset + vb->buffer_offset; | |||
| @@ -173,17 +181,16 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) | |||
| INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); | |||
| } | |||
| nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, | |||
| nvc0_resource(vb->buffer), NOUVEAU_BO_RD); | |||
| nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, res, NOUVEAU_BO_RD); | |||
| BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); | |||
| OUT_RING (chan, (1 << 12) | vb->stride); | |||
| BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); | |||
| OUT_RING (chan, i); | |||
| OUT_RELOCh(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD); | |||
| OUT_RELOCl(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD); | |||
| OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); | |||
| OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); | |||
| OUT_RESRCh(chan, res, size, NOUVEAU_BO_RD); | |||
| OUT_RESRCl(chan, res, size, NOUVEAU_BO_RD); | |||
| OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); | |||
| OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); | |||
| } | |||
| for (; i < nvc0->state.num_vtxelts; ++i) { | |||
| BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); | |||
| @@ -231,8 +238,6 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) | |||
| struct nvc0_context *nvc0 = chan->user_private; | |||
| nvc0_bufctx_emit_relocs(nvc0); | |||
| debug_printf("%s(%p)\n", __FUNCTION__, nvc0); | |||
| } | |||
| #if 0 | |||
| @@ -325,7 +330,7 @@ nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, | |||
| count &= ~3; | |||
| } | |||
| while (count) { | |||
| unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~3) * 4) / 4; | |||
| unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; | |||
| BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); | |||
| for (i = 0; i < nr; ++i) { | |||
| @@ -333,7 +338,7 @@ nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, | |||
| (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); | |||
| map += 4; | |||
| } | |||
| count -= nr; | |||
| count -= nr * 4; | |||
| } | |||
| } | |||
| @@ -349,14 +354,14 @@ nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, | |||
| OUT_RING (chan, *map++); | |||
| } | |||
| while (count) { | |||
| unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~1) * 2) / 2; | |||
| unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; | |||
| BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); | |||
| for (i = 0; i < nr; ++i) { | |||
| OUT_RING(chan, (map[1] << 16) | map[0]); | |||
| map += 2; | |||
| } | |||
| count -= nr; | |||
| count -= nr * 2; | |||
| } | |||
| } | |||
| @@ -367,18 +372,41 @@ nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, | |||
| map += start; | |||
| while (count) { | |||
| unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); | |||
| const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); | |||
| BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); | |||
| for (i = 0; i < nr; ++i) | |||
| OUT_RING(chan, *map++); | |||
| OUT_RINGp (chan, map, nr); | |||
| map += nr; | |||
| count -= nr; | |||
| } | |||
| } | |||
| static void | |||
| nvc0_draw_elements(struct nvc0_context *nvc0, | |||
| nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, | |||
| unsigned start, unsigned count) | |||
| { | |||
| map += start; | |||
| if (count & 1) { | |||
| count--; | |||
| BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); | |||
| OUT_RING (chan, *map++); | |||
| } | |||
| while (count) { | |||
| unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; | |||
| BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); | |||
| for (i = 0; i < nr; ++i) { | |||
| OUT_RING(chan, (map[1] << 16) | map[0]); | |||
| map += 2; | |||
| } | |||
| count -= nr * 2; | |||
| } | |||
| } | |||
| static void | |||
| nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, | |||
| unsigned mode, unsigned start, unsigned count, | |||
| unsigned instance_count, int32_t index_bias) | |||
| { | |||
| @@ -400,7 +428,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, | |||
| } | |||
| if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { | |||
| struct nouveau_bo *bo = nvc0_resource(nvc0->idxbuf.buffer)->bo; | |||
| struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); | |||
| unsigned offset = nvc0->idxbuf.offset; | |||
| unsigned limit = nvc0->idxbuf.buffer->width0 - 1; | |||
| @@ -415,10 +443,10 @@ nvc0_draw_elements(struct nvc0_context *nvc0, | |||
| BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); | |||
| OUT_RING (chan, mode); | |||
| BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); | |||
| OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); | |||
| OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); | |||
| OUT_RELOCh(chan, bo, limit, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); | |||
| OUT_RELOCl(chan, bo, limit, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); | |||
| OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); | |||
| OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); | |||
| OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); | |||
| OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD); | |||
| OUT_RING (chan, index_size); | |||
| OUT_RING (chan, start); | |||
| OUT_RING (chan, count); | |||
| @@ -443,7 +471,10 @@ nvc0_draw_elements(struct nvc0_context *nvc0, | |||
| nvc0_draw_elements_inline_u16(chan, data, start, count); | |||
| break; | |||
| case 4: | |||
| nvc0_draw_elements_inline_u32(chan, data, start, count); | |||
| if (shorten) | |||
| nvc0_draw_elements_inline_u32_short(chan, data, start, count); | |||
| else | |||
| nvc0_draw_elements_inline_u32(chan, data, start, count); | |||
| break; | |||
| default: | |||
| assert(0); | |||
| @@ -464,6 +495,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) | |||
| struct nvc0_context *nvc0 = nvc0_context(pipe); | |||
| struct nouveau_channel *chan = nvc0->screen->base.channel; | |||
| /* For picking only a few vertices from a large user buffer, push is better, | |||
| * if index count is larger and we expect repeated vertices, suggest upload. | |||
| */ | |||
| nvc0->vbo_push_hint = /* the 64 is heuristic */ | |||
| !(info->indexed && | |||
| ((info->max_index - info->min_index + 64) < info->count)); | |||
| nvc0_state_validate(nvc0); | |||
| if (nvc0->state.instance_base != info->start_instance) { | |||
| @@ -488,6 +526,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) | |||
| info->mode, info->start, info->count, | |||
| info->instance_count); | |||
| } else { | |||
| boolean shorten = info->max_index <= 65535; | |||
| assert(nvc0->idxbuf.buffer); | |||
| if (info->primitive_restart != nvc0->state.prim_restart) { | |||
| @@ -495,6 +535,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) | |||
| BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); | |||
| OUT_RING (chan, 1); | |||
| OUT_RING (chan, info->restart_index); | |||
| if (info->restart_index > 65535) | |||
| shorten = FALSE; | |||
| } else { | |||
| INLIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0); | |||
| } | |||
| @@ -505,7 +548,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) | |||
| OUT_RING (chan, info->restart_index); | |||
| } | |||
| nvc0_draw_elements(nvc0, | |||
| nvc0_draw_elements(nvc0, shorten, | |||
| info->mode, info->start, info->count, | |||
| info->instance_count, info->index_bias); | |||
| } | |||
| @@ -12,6 +12,8 @@ | |||
| #include "nouveau/nouveau_resource.h" | |||
| #include "nouveau/nouveau_reloc.h" | |||
| #include "nvc0_resource.h" /* OUT_RESRC */ | |||
| #ifndef NV04_PFIFO_MAX_PACKET_LEN | |||
| #define NV04_PFIFO_MAX_PACKET_LEN 2047 | |||
| #endif | |||
| @@ -143,6 +145,20 @@ OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo, | |||
| return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0); | |||
| } | |||
| static INLINE int | |||
| OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res, | |||
| unsigned delta, unsigned flags) | |||
| { | |||
| return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); | |||
| } | |||
| static INLINE int | |||
| OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, | |||
| unsigned delta, unsigned flags) | |||
| { | |||
| return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); | |||
| } | |||
| static INLINE void | |||
| FIRE_RING(struct nouveau_channel *chan) | |||
| { | |||