In order to keep early-Z from writing early in a discard shader, you need to set the "modifies Z" bit in the shader state (which the new prog_data.discards will indicate). Then, in the shader we do a TLB write to make Z passthrough happen (the QPU result is ignored, so we use a NULL source).tags/17.3-branchpoint
| @@ -1038,6 +1038,17 @@ emit_frag_end(struct v3d_compile *c) | |||
| TLB_TYPE_DEPTH | | |||
| TLB_DEPTH_TYPE_PER_PIXEL | | |||
| 0xffffff00); | |||
| } else if (c->s->info.fs.uses_discard) { | |||
| struct qinst *inst = vir_MOV_dest(c, | |||
| vir_reg(QFILE_TLBU, 0), | |||
| vir_reg(QFILE_NULL, 0)); | |||
| vir_set_cond(inst, discard_cond); | |||
| inst->src[vir_get_implicit_uniform_src(inst)] = | |||
| vir_uniform_ui(c, | |||
| TLB_TYPE_DEPTH | | |||
| TLB_DEPTH_TYPE_INVARIANT | | |||
| 0xffffff00); | |||
| } | |||
| /* XXX: Performance improvement: Merge Z write and color writes TLB | |||
| @@ -579,6 +579,7 @@ struct v3d_fs_prog_data { | |||
| BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; | |||
| bool writes_z; | |||
| bool discard; | |||
| }; | |||
| /* Special nir_load_input intrinsic index for loading the current TLB | |||
| @@ -762,8 +762,9 @@ uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler, | |||
| v3d_set_prog_data(c, &prog_data->base); | |||
| v3d_set_fs_prog_data_inputs(c, prog_data); | |||
| if (c->s->info.outputs_written & (1 << FRAG_RESULT_DEPTH)) | |||
| prog_data->writes_z = true; | |||
| prog_data->writes_z = (c->s->info.outputs_written & | |||
| (1 << FRAG_RESULT_DEPTH)); | |||
| prog_data->discard = c->s->info.fs.uses_discard; | |||
| return v3d_return_qpu_insts(c, final_assembly_size); | |||
| } | |||