Add finalize_vertex_elements() to finalize ilo_ve_state. This fixes a potential issue with URB entry allocation for VS and move the complexity of gen6_3DSTATE_VERTEX_ELEMENTS() to the new function. Signed-off-by: Chia-I Wu <olvaffe@gmail.com>tags/10.4-branchpoint
@@ -28,7 +28,6 @@ | |||
#include "util/u_draw.h" | |||
#include "util/u_pack_color.h" | |||
#include "ilo_builder_3d_top.h" /* for ve_init_cso_with_components() */ | |||
#include "ilo_draw.h" | |||
#include "ilo_state.h" | |||
#include "ilo_state_gen.h" | |||
@@ -41,24 +40,25 @@ | |||
static bool | |||
ilo_blitter_set_invariants(struct ilo_blitter *blitter) | |||
{ | |||
struct pipe_vertex_element velems[2]; | |||
struct pipe_vertex_element velem; | |||
struct pipe_viewport_state vp; | |||
if (blitter->initialized) | |||
return true; | |||
/* only vertex X and Y */ | |||
memset(&velems, 0, sizeof(velems)); | |||
velems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; | |||
ilo_gpe_init_ve(blitter->ilo->dev, 2, velems, &blitter->ve); | |||
memset(&velem, 0, sizeof(velem)); | |||
velem.src_format = PIPE_FORMAT_R32G32_FLOAT; | |||
ilo_gpe_init_ve(blitter->ilo->dev, 1, &velem, &blitter->ve); | |||
/* override first VE to be VUE header */ | |||
ve_init_cso_with_components(blitter->ilo->dev, | |||
/* generate VUE header */ | |||
ilo_gpe_init_ve_nosrc(blitter->ilo->dev, | |||
GEN6_VFCOMP_STORE_0, /* Reserved */ | |||
GEN6_VFCOMP_STORE_0, /* Render Target Array Index */ | |||
GEN6_VFCOMP_STORE_0, /* Viewport Index */ | |||
GEN6_VFCOMP_STORE_0, /* Point Width */ | |||
&blitter->ve.cso[0]); | |||
&blitter->ve.nosrc_cso); | |||
blitter->ve.prepend_nosrc_cso = true; | |||
/* a rectangle has 3 vertices in a RECTLIST */ | |||
util_draw_init_info(&blitter->draw); |
@@ -438,77 +438,9 @@ gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, | |||
ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0); | |||
} | |||
static inline void | |||
ve_init_cso_with_components(const struct ilo_dev_info *dev, | |||
int comp0, int comp1, int comp2, int comp3, | |||
struct ilo_ve_cso *cso) | |||
{ | |||
ILO_DEV_ASSERT(dev, 6, 7.5); | |||
STATIC_ASSERT(Elements(cso->payload) >= 2); | |||
cso->payload[0] = GEN6_VE_STATE_DW0_VALID; | |||
cso->payload[1] = | |||
comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | | |||
comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | | |||
comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | | |||
comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; | |||
} | |||
static inline void | |||
ve_set_cso_edgeflag(const struct ilo_dev_info *dev, | |||
struct ilo_ve_cso *cso) | |||
{ | |||
int format; | |||
ILO_DEV_ASSERT(dev, 6, 7.5); | |||
/* | |||
* From the Sandy Bridge PRM, volume 2 part 1, page 94: | |||
* | |||
* "- This bit (Edge Flag Enable) must only be ENABLED on the last | |||
* valid VERTEX_ELEMENT structure. | |||
* | |||
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, | |||
* and Component 1-3 Control must be set to VFCOMP_NOSTORE. | |||
* | |||
* - The Source Element Format must be set to the UINT format. | |||
* | |||
* - [DevSNB]: Edge Flags are not supported for QUADLIST | |||
* primitives. Software may elect to convert QUADLIST primitives | |||
* to some set of corresponding edge-flag-supported primitive | |||
* types (e.g., POLYGONs) prior to submission to the 3D pipeline." | |||
*/ | |||
cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; | |||
cso->payload[1] = | |||
GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | | |||
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | | |||
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | | |||
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; | |||
/* | |||
* Edge flags have format GEN6_FORMAT_R8_UINT when defined via | |||
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined | |||
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. | |||
* | |||
* Since all the hardware cares about is whether the flags are zero or not, | |||
* we can treat them as GEN6_FORMAT_R32_UINT in the latter case. | |||
*/ | |||
format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; | |||
if (format == GEN6_FORMAT_R32_FLOAT) { | |||
STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); | |||
cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); | |||
} | |||
else { | |||
assert(format == GEN6_FORMAT_R8_UINT); | |||
} | |||
} | |||
static inline void | |||
gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, | |||
const struct ilo_ve_state *ve, | |||
bool last_velement_edgeflag, | |||
bool prepend_generated_ids) | |||
const struct ilo_ve_state *ve) | |||
{ | |||
uint8_t cmd_len; | |||
uint32_t *dw; | |||
@@ -517,66 +449,37 @@ gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, | |||
ILO_DEV_ASSERT(builder->dev, 6, 7.5); | |||
/* | |||
* From the Sandy Bridge PRM, volume 2 part 1, page 92: | |||
* | |||
* "At least one VERTEX_ELEMENT_STATE structure must be included." | |||
* | |||
* From the Sandy Bridge PRM, volume 2 part 1, page 93: | |||
* | |||
* "Up to 34 (DevSNB+) vertex elements are supported." | |||
*/ | |||
assert(ve->count + prepend_generated_ids <= 34); | |||
assert(ve->count + ve->prepend_nosrc_cso >= 1); | |||
assert(ve->count + ve->prepend_nosrc_cso <= 34); | |||
STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); | |||
if (!ve->count && !prepend_generated_ids) { | |||
struct ilo_ve_cso dummy; | |||
ve_init_cso_with_components(builder->dev, | |||
GEN6_VFCOMP_STORE_0, | |||
GEN6_VFCOMP_STORE_0, | |||
GEN6_VFCOMP_STORE_0, | |||
GEN6_VFCOMP_STORE_1_FP, | |||
&dummy); | |||
cmd_len = 3; | |||
ilo_builder_batch_pointer(builder, cmd_len, &dw); | |||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); | |||
memcpy(&dw[1], dummy.payload, sizeof(dummy.payload)); | |||
return; | |||
} | |||
cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; | |||
cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso); | |||
ilo_builder_batch_pointer(builder, cmd_len, &dw); | |||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); | |||
dw++; | |||
if (prepend_generated_ids) { | |||
struct ilo_ve_cso gen_ids; | |||
ve_init_cso_with_components(builder->dev, | |||
GEN6_VFCOMP_STORE_VID, | |||
GEN6_VFCOMP_STORE_IID, | |||
GEN6_VFCOMP_NOSTORE, | |||
GEN6_VFCOMP_NOSTORE, | |||
&gen_ids); | |||
memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload)); | |||
if (ve->prepend_nosrc_cso) { | |||
memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload)); | |||
dw += 2; | |||
} | |||
if (last_velement_edgeflag && ve->count) { | |||
struct ilo_ve_cso edgeflag; | |||
for (i = 0; i < ve->count - 1; i++) | |||
memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); | |||
edgeflag = ve->cso[i]; | |||
ve_set_cso_edgeflag(builder->dev, &edgeflag); | |||
memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload)); | |||
} else { | |||
for (i = 0; i < ve->count; i++) | |||
memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); | |||
for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) { | |||
memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload)); | |||
dw += 2; | |||
} | |||
if (ve->last_cso_edgeflag) | |||
memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload)); | |||
} | |||
static inline void |
@@ -332,8 +332,8 @@ gen6_draw_common_urb(struct ilo_render *r, | |||
* VS-generated output data, output URB availability isn't a | |||
* factor." | |||
*/ | |||
if (vs_entry_size < vec->ve->count) | |||
vs_entry_size = vec->ve->count; | |||
if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) | |||
vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; | |||
gs_entry_size = (vec->gs) ? | |||
ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) : | |||
@@ -465,31 +465,8 @@ gen6_draw_vf(struct ilo_render *r, | |||
gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb); | |||
/* 3DSTATE_VERTEX_ELEMENTS */ | |||
if (DIRTY(VE) || DIRTY(VS)) { | |||
const struct ilo_ve_state *ve = vec->ve; | |||
bool last_velement_edgeflag = false; | |||
bool prepend_generate_ids = false; | |||
if (vec->vs) { | |||
if (ilo_shader_get_kernel_param(vec->vs, | |||
ILO_KERNEL_VS_INPUT_EDGEFLAG)) { | |||
/* we rely on the state tracker here */ | |||
assert(ilo_shader_get_kernel_param(vec->vs, | |||
ILO_KERNEL_INPUT_COUNT) == ve->count); | |||
last_velement_edgeflag = true; | |||
} | |||
if (ilo_shader_get_kernel_param(vec->vs, | |||
ILO_KERNEL_VS_INPUT_INSTANCEID) || | |||
ilo_shader_get_kernel_param(vec->vs, | |||
ILO_KERNEL_VS_INPUT_VERTEXID)) | |||
prepend_generate_ids = true; | |||
} | |||
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, ve, | |||
last_velement_edgeflag, prepend_generate_ids); | |||
} | |||
if (DIRTY(VE)) | |||
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve); | |||
} | |||
void | |||
@@ -978,11 +955,12 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r, | |||
session->vb_start, session->vb_end, | |||
sizeof(blitter->vertices[0])); | |||
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, | |||
&blitter->ve, false, false); | |||
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); | |||
gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0, | |||
(blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float), | |||
0); | |||
gen6_3DSTATE_URB(r->builder, | |||
r->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0); | |||
/* 3DSTATE_URB workaround */ | |||
if (r->state.gs.active) { | |||
ilo_render_emit_flush(r); |
@@ -245,8 +245,8 @@ gen7_draw_common_urb(struct ilo_render *r, | |||
* Allocation Size must be sized to the maximum of the vertex input | |||
* and output structures." | |||
*/ | |||
if (vs_entry_size < vec->ve->count) | |||
vs_entry_size = vec->ve->count; | |||
if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) | |||
vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; | |||
vs_entry_size *= sizeof(float) * 4; | |||
vs_total_size = r->dev->urb_size - offset; | |||
@@ -716,7 +716,8 @@ gen7_rectlist_urb(struct ilo_render *r, | |||
(ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? 32768 : 16384; | |||
gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset, | |||
blitter->ve.count * 4 * sizeof(float)); | |||
(blitter->ve.count + blitter->ve.prepend_nosrc_cso) * | |||
4 * sizeof(float)); | |||
gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0); | |||
gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0); | |||
@@ -839,8 +840,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r, | |||
session->vb_start, session->vb_end, | |||
sizeof(blitter->vertices[0])); | |||
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, | |||
&blitter->ve, false, false); | |||
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); | |||
gen7_rectlist_pcb_alloc(r, blitter); | |||
@@ -186,6 +186,63 @@ finalize_index_buffer(struct ilo_context *ilo) | |||
pipe_resource_reference(¤t_hw_res, NULL); | |||
} | |||
static void | |||
finalize_vertex_elements(struct ilo_context *ilo) | |||
{ | |||
struct ilo_state_vector *vec = &ilo->state_vector; | |||
if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS))) | |||
return; | |||
vec->dirty |= ILO_DIRTY_VE; | |||
vec->ve->last_cso_edgeflag = false; | |||
if (vec->ve->count && vec->vs && | |||
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) { | |||
vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1]; | |||
ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso); | |||
vec->ve->last_cso_edgeflag = true; | |||
} | |||
vec->ve->prepend_nosrc_cso = false; | |||
if (vec->vs && | |||
(ilo_shader_get_kernel_param(vec->vs, | |||
ILO_KERNEL_VS_INPUT_INSTANCEID) || | |||
ilo_shader_get_kernel_param(vec->vs, | |||
ILO_KERNEL_VS_INPUT_VERTEXID))) { | |||
ilo_gpe_init_ve_nosrc(ilo->dev, | |||
GEN6_VFCOMP_STORE_VID, | |||
GEN6_VFCOMP_STORE_IID, | |||
GEN6_VFCOMP_NOSTORE, | |||
GEN6_VFCOMP_NOSTORE, | |||
&vec->ve->nosrc_cso); | |||
vec->ve->prepend_nosrc_cso = true; | |||
} else if (!vec->vs) { | |||
/* generate VUE header */ | |||
ilo_gpe_init_ve_nosrc(ilo->dev, | |||
GEN6_VFCOMP_STORE_0, /* Reserved */ | |||
GEN6_VFCOMP_STORE_0, /* Render Target Array Index */ | |||
GEN6_VFCOMP_STORE_0, /* Viewport Index */ | |||
GEN6_VFCOMP_STORE_0, /* Point Width */ | |||
&vec->ve->nosrc_cso); | |||
vec->ve->prepend_nosrc_cso = true; | |||
} else if (!vec->ve->count) { | |||
/* | |||
* From the Sandy Bridge PRM, volume 2 part 1, page 92: | |||
* | |||
* "SW must ensure that at least one vertex element is defined prior | |||
* to issuing a 3DPRIMTIVE command, or operation is UNDEFINED." | |||
*/ | |||
ilo_gpe_init_ve_nosrc(ilo->dev, | |||
GEN6_VFCOMP_STORE_0, | |||
GEN6_VFCOMP_STORE_0, | |||
GEN6_VFCOMP_STORE_0, | |||
GEN6_VFCOMP_STORE_1_FP, | |||
&vec->ve->nosrc_cso); | |||
vec->ve->prepend_nosrc_cso = true; | |||
} | |||
} | |||
/** | |||
* Finalize states. Some states depend on other states and are | |||
* incomplete/invalid until finalized. | |||
@@ -199,6 +256,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, | |||
finalize_shader_states(&ilo->state_vector); | |||
finalize_constant_buffers(ilo); | |||
finalize_index_buffer(ilo); | |||
finalize_vertex_elements(ilo); | |||
u_upload_unmap(ilo->uploader); | |||
} |
@@ -176,6 +176,13 @@ struct ilo_ve_state { | |||
unsigned instance_divisors[PIPE_MAX_ATTRIBS]; | |||
unsigned vb_mapping[PIPE_MAX_ATTRIBS]; | |||
unsigned vb_count; | |||
/* these are not valid until the state is finalized */ | |||
struct ilo_ve_cso edgeflag_cso; | |||
bool last_cso_edgeflag; | |||
struct ilo_ve_cso nosrc_cso; | |||
bool prepend_nosrc_cso; | |||
}; | |||
struct ilo_so_state { | |||
@@ -385,7 +392,7 @@ struct ilo_state_vector { | |||
uint32_t dirty; | |||
struct ilo_vb_state vb; | |||
const struct ilo_ve_state *ve; | |||
struct ilo_ve_state *ve; | |||
struct ilo_ib_state ib; | |||
struct ilo_shader_state *vs; |
@@ -86,6 +86,15 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, | |||
const struct pipe_vertex_element *states, | |||
struct ilo_ve_state *ve); | |||
void | |||
ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev, | |||
struct ilo_ve_cso *cso); | |||
void | |||
ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev, | |||
int comp0, int comp1, int comp2, int comp3, | |||
struct ilo_ve_cso *cso); | |||
void | |||
ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, | |||
const struct pipe_viewport_state *state, |
@@ -327,6 +327,83 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, | |||
} | |||
} | |||
void | |||
ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev, | |||
struct ilo_ve_cso *cso) | |||
{ | |||
int format; | |||
ILO_DEV_ASSERT(dev, 6, 7.5); | |||
/* | |||
* From the Sandy Bridge PRM, volume 2 part 1, page 94: | |||
* | |||
* "- This bit (Edge Flag Enable) must only be ENABLED on the last | |||
* valid VERTEX_ELEMENT structure. | |||
* | |||
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, | |||
* and Component 1-3 Control must be set to VFCOMP_NOSTORE. | |||
* | |||
* - The Source Element Format must be set to the UINT format. | |||
* | |||
* - [DevSNB]: Edge Flags are not supported for QUADLIST | |||
* primitives. Software may elect to convert QUADLIST primitives | |||
* to some set of corresponding edge-flag-supported primitive | |||
* types (e.g., POLYGONs) prior to submission to the 3D pipeline." | |||
*/ | |||
cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; | |||
/* | |||
* Edge flags have format GEN6_FORMAT_R8_UINT when defined via | |||
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined | |||
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. | |||
* | |||
* Since all the hardware cares about is whether the flags are zero or not, | |||
* we can treat them as GEN6_FORMAT_R32_UINT in the latter case. | |||
*/ | |||
format = GEN_EXTRACT(cso->payload[0], GEN6_VE_STATE_DW0_FORMAT); | |||
cso->payload[0] &= ~GEN6_VE_STATE_DW0_FORMAT__MASK; | |||
switch (format) { | |||
case GEN6_FORMAT_R32_FLOAT: | |||
format = GEN6_FORMAT_R32_UINT; | |||
break; | |||
default: | |||
assert(format == GEN6_FORMAT_R8_UINT); | |||
break; | |||
} | |||
cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_STATE_DW0_FORMAT); | |||
cso->payload[1] = | |||
GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | | |||
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | | |||
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | | |||
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; | |||
} | |||
void | |||
ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev, | |||
int comp0, int comp1, int comp2, int comp3, | |||
struct ilo_ve_cso *cso) | |||
{ | |||
ILO_DEV_ASSERT(dev, 6, 7.5); | |||
STATIC_ASSERT(Elements(cso->payload) >= 2); | |||
assert(comp0 != GEN6_VFCOMP_STORE_SRC && | |||
comp1 != GEN6_VFCOMP_STORE_SRC && | |||
comp2 != GEN6_VFCOMP_STORE_SRC && | |||
comp3 != GEN6_VFCOMP_STORE_SRC); | |||
cso->payload[0] = GEN6_VE_STATE_DW0_VALID; | |||
cso->payload[1] = | |||
comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | | |||
comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | | |||
comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | | |||
comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; | |||
} | |||
void | |||
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, | |||
const struct ilo_shader_state *vs, |