It shows up as a special (magic?) attribute. We could try to be clever and only include the extra record if gl_VertexID is actually read, but honestly that's just extra complexity for no good reason. Might as well just always include it; this won't be a real bottleneck, I don't think. Fixes dEQP-GLES3.functional.shaders.builtin_variable.vertex_id. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>master
| @@ -115,6 +115,14 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch) | |||
| } | |||
| } | |||
| /* Add special gl_VertexID/gl_InstanceID buffers */ | |||
| panfrost_vertex_id(ctx->padded_count, &attrs[k]); | |||
| so->hw[PAN_VERTEX_ID].index = k++; | |||
| panfrost_instance_id(ctx->padded_count, &attrs[k]); | |||
| so->hw[PAN_INSTANCE_ID].index = k++; | |||
| /* Upload whatever we emitted and go */ | |||
| ctx->payloads[PIPE_SHADER_VERTEX].postfix.attributes = | |||
| @@ -440,7 +440,7 @@ panfrost_stage_attributes(struct panfrost_context *ctx) | |||
| struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); | |||
| struct panfrost_vertex_state *so = ctx->vertex; | |||
| size_t sz = sizeof(struct mali_attr_meta) * so->num_elements; | |||
| size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE; | |||
| struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz); | |||
| struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu; | |||
| @@ -481,12 +481,17 @@ panfrost_stage_attributes(struct panfrost_context *ctx) | |||
| /* Also, somewhat obscurely per-instance data needs to be | |||
| * offset in response to a delayed start in an indexed draw */ | |||
| if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) { | |||
| if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) | |||
| target[i].src_offset -= buf->stride * start; | |||
| } | |||
| } | |||
| /* Let's also include vertex builtins */ | |||
| } | |||
| target[PAN_VERTEX_ID].format = MALI_R32UI; | |||
| target[PAN_VERTEX_ID].swizzle = panfrost_get_default_swizzle(1); | |||
| target[PAN_INSTANCE_ID].format = MALI_R32UI; | |||
| target[PAN_INSTANCE_ID].swizzle = panfrost_get_default_swizzle(1); | |||
| ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu; | |||
| } | |||
| @@ -179,3 +179,43 @@ panfrost_vertex_instanced( | |||
| return 2; | |||
| } | |||
| } | |||
| /* Records for gl_VertexID and gl_InstanceID use a slightly special encoding, | |||
| * but the idea is the same */ | |||
| void | |||
| panfrost_vertex_id( | |||
| unsigned padded_count, | |||
| union mali_attr *attr) | |||
| { | |||
| /* We factor the padded count as shift/odd and that's it */ | |||
| attr->elements = MALI_ATTR_VERTEXID; | |||
| attr->shift = __builtin_ctz(padded_count); | |||
| attr->extra_flags = padded_count >> (attr->shift + 1); | |||
| attr->stride = attr->size = 0; | |||
| } | |||
| void | |||
| panfrost_instance_id( | |||
| unsigned padded_count, | |||
| union mali_attr *attr) | |||
| { | |||
| attr->elements = MALI_ATTR_INSTANCEID; | |||
| attr->stride = attr->extra_flags = attr->size = 0; | |||
| /* POT records have just a shift directly with an off-by-one for | |||
| * unclear reasons. NPOT records have a magic divisor smushed into the | |||
| * stride field (which is unused for these special records) */ | |||
| if (util_is_power_of_two_or_zero(padded_count)) { | |||
| attr->shift = __builtin_ctz(padded_count) - 1; | |||
| } else { | |||
| unsigned shift = 0, flags = 0; | |||
| attr->stride = panfrost_compute_magic_divisor(padded_count, &shift, &flags); | |||
| attr->shift = shift; | |||
| attr->extra_flags = flags; | |||
| } | |||
| } | |||
| @@ -98,4 +98,7 @@ panfrost_vertex_instanced( | |||
| unsigned divisor, | |||
| union mali_attr *attrs); | |||
| void panfrost_vertex_id(unsigned padded_count, union mali_attr *attr); | |||
| void panfrost_instance_id(unsigned padded_count, union mali_attr *attr); | |||
| #endif | |||