Signed-off-by: Rob Clark <robdclark@gmail.com>tags/18.1-branchpoint
| @@ -42,6 +42,18 @@ fd5_create_compute_state(struct pipe_context *pctx, | |||
| const struct pipe_compute_state *cso) | |||
| { | |||
| struct fd_context *ctx = fd_context(pctx); | |||
| /* req_input_mem will only be non-zero for cl kernels (ie. clover). | |||
| * This isn't a perfect test because I guess it is possible (but | |||
| * uncommon) for none for the kernel parameters to be a global, | |||
| * but ctx->set_global_bindings() can't fail, so this is the next | |||
| * best place to fail if we need a newer version of kernel driver: | |||
| */ | |||
| if ((cso->req_input_mem > 0) && | |||
| fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) { | |||
| return NULL; | |||
| } | |||
| struct ir3_compiler *compiler = ctx->screen->compiler; | |||
| struct fd5_compute_stateobj *so = CALLOC_STRUCT(fd5_compute_stateobj); | |||
| so->shader = ir3_shader_create_compute(compiler, cso, &ctx->debug); | |||
| @@ -156,6 +168,7 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) | |||
| struct ir3_shader_key key = {0}; | |||
| struct ir3_shader_variant *v; | |||
| struct fd_ringbuffer *ring = ctx->batch->draw; | |||
| unsigned i, nglobal = 0; | |||
| emit_setup(ctx); | |||
| @@ -167,6 +180,23 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) | |||
| fd5_emit_cs_state(ctx, ring, v); | |||
| ir3_emit_cs_consts(v, ring, ctx, info); | |||
| foreach_bit(i, ctx->global_bindings.enabled_mask) | |||
| nglobal++; | |||
| if (nglobal > 0) { | |||
| /* global resources don't otherwise get an OUT_RELOC(), since | |||
| * the raw ptr address is emitted ir ir3_emit_cs_consts(). | |||
| * So to make the kernel aware that these buffers are referenced | |||
| * by the batch, emit dummy reloc's as part of a no-op packet | |||
| * payload: | |||
| */ | |||
| OUT_PKT7(ring, CP_NOP, 2 * nglobal); | |||
| foreach_bit(i, ctx->global_bindings.enabled_mask) { | |||
| struct pipe_resource *prsc = ctx->global_bindings.buf[i]; | |||
| OUT_RELOCW(ring, fd_resource(prsc)->bo, 0, 0, 0); | |||
| } | |||
| } | |||
| const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size; | |||
| const unsigned *num_groups = info->grid; | |||
| /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */ | |||
| @@ -110,6 +110,12 @@ struct fd_streamout_stateobj { | |||
| unsigned offsets[PIPE_MAX_SO_BUFFERS]; | |||
| }; | |||
| #define MAX_GLOBAL_BUFFERS 16 | |||
| struct fd_global_bindings_stateobj { | |||
| struct pipe_resource *buf[MAX_GLOBAL_BUFFERS]; | |||
| uint32_t enabled_mask; | |||
| }; | |||
| /* group together the vertex and vertexbuf state.. for ease of passing | |||
| * around, and because various internal operations (gmem<->mem, etc) | |||
| * need their own vertex state: | |||
| @@ -282,6 +288,7 @@ struct fd_context { | |||
| struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES]; | |||
| struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES]; | |||
| struct fd_streamout_stateobj streamout; | |||
| struct fd_global_bindings_stateobj global_bindings; | |||
| struct pipe_clip_state ucp; | |||
| struct pipe_query *cond_query; | |||
| @@ -488,6 +488,12 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) | |||
| foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures) | |||
| resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture); | |||
| /* For global buffers, we don't really know if read or written, so assume | |||
| * the worst: | |||
| */ | |||
| foreach_bit(i, ctx->global_bindings.enabled_mask) | |||
| resource_written(batch, ctx->global_bindings.buf[i]); | |||
| if (info->indirect) | |||
| resource_read(batch, info->indirect); | |||
| @@ -495,15 +495,53 @@ fd_set_compute_resources(struct pipe_context *pctx, | |||
| // TODO | |||
| } | |||
| /* used by clover to bind global objects, returning the bo address | |||
| * via handles[n] | |||
| */ | |||
| static void | |||
| fd_set_global_binding(struct pipe_context *pctx, | |||
| unsigned first, unsigned count, struct pipe_resource **prscs, | |||
| uint32_t **handles) | |||
| { | |||
| /* TODO only used by clover.. seems to need us to return the actual | |||
| * gpuaddr of the buffer.. which isn't really exposed to mesa atm. | |||
| * How is this used? | |||
| */ | |||
| struct fd_context *ctx = fd_context(pctx); | |||
| struct fd_global_bindings_stateobj *so = &ctx->global_bindings; | |||
| unsigned mask = 0; | |||
| if (prscs) { | |||
| for (unsigned i = 0; i < count; i++) { | |||
| unsigned n = i + first; | |||
| mask |= BIT(n); | |||
| pipe_resource_reference(&so->buf[n], prscs[i]); | |||
| if (so->buf[n]) { | |||
| struct fd_resource *rsc = fd_resource(so->buf[n]); | |||
| uint64_t iova = fd_bo_get_iova(rsc->bo); | |||
| // TODO need to scream if iova > 32b or fix gallium API.. | |||
| *handles[i] += iova; | |||
| } | |||
| if (prscs[i]) | |||
| so->enabled_mask |= BIT(n); | |||
| else | |||
| so->enabled_mask &= ~BIT(n); | |||
| } | |||
| } else { | |||
| mask = (BIT(count) - 1) << first; | |||
| for (unsigned i = 0; i < count; i++) { | |||
| unsigned n = i + first; | |||
| if (so->buf[n]) { | |||
| struct fd_resource *rsc = fd_resource(so->buf[n]); | |||
| fd_bo_put_iova(rsc->bo); | |||
| } | |||
| pipe_resource_reference(&so->buf[n], NULL); | |||
| } | |||
| so->enabled_mask &= ~mask; | |||
| } | |||
| } | |||
| void | |||