This is Vadim's initial work with a few regression fixes squashed in. v2: (airlied) fix regression in glsl-max-varyings - need to use vs and ps_dirty fix regression in shader exports from rebasing. whitespace fixing. v2.1: squash fix assert Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>tags/mesa-10.2-rc1
@@ -93,8 +93,8 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) | |||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ | |||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); | |||
id++; | |||
} else if (cfop->flags & CF_STRM) { | |||
/* MEM_STREAM instructions */ | |||
} else if (cfop->flags & CF_MEM) { | |||
/* MEM_STREAM, MEM_RING instructions */ | |||
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | | |||
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | | |||
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | | |||
@@ -109,12 +109,13 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) | |||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); | |||
id++; | |||
} else { | |||
/* branch, loop, call, return instructions */ | |||
/* other instructions */ | |||
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); | |||
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)| | |||
S_SQ_CF_WORD1_BARRIER(1) | | |||
S_SQ_CF_WORD1_COND(cf->cond) | | |||
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); | |||
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | | |||
S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); | |||
} | |||
} | |||
return 0; |
@@ -2518,6 +2518,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, | |||
struct r600_resource *rbuffer; | |||
uint64_t va; | |||
unsigned buffer_index = ffs(dirty_mask) - 1; | |||
unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER); | |||
cb = &state->cb[buffer_index]; | |||
rbuffer = (struct r600_resource*)cb->buffer; | |||
@@ -2526,10 +2527,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, | |||
va = r600_resource_va(&rctx->screen->b.b, &rbuffer->b.b); | |||
va += cb->buffer_offset; | |||
r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, | |||
ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags); | |||
r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, | |||
pkt_flags); | |||
if (!gs_ring_buffer) { | |||
r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, | |||
ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags); | |||
r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, | |||
pkt_flags); | |||
} | |||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); | |||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); | |||
@@ -2539,10 +2542,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, | |||
radeon_emit(cs, va); /* RESOURCEi_WORD0 */ | |||
radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */ | |||
radeon_emit(cs, /* RESOURCEi_WORD2 */ | |||
S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | | |||
S_030008_STRIDE(16) | | |||
S_030008_BASE_ADDRESS_HI(va >> 32UL)); | |||
S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) | | |||
S_030008_STRIDE(gs_ring_buffer ? 4 : 16) | | |||
S_030008_BASE_ADDRESS_HI(va >> 32UL) | | |||
S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT)); | |||
radeon_emit(cs, /* RESOURCEi_WORD3 */ | |||
S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) | | |||
S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | | |||
S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | | |||
S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | | |||
@@ -2550,7 +2555,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, | |||
radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ | |||
radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ | |||
radeon_emit(cs, 0); /* RESOURCEi_WORD6 */ | |||
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */ | |||
radeon_emit(cs, /* RESOURCEi_WORD7 */ | |||
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); | |||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); | |||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); | |||
@@ -2714,6 +2720,63 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct | |||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, RADEON_USAGE_READ)); | |||
} | |||
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) | |||
{ | |||
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; | |||
struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; | |||
uint32_t v = 0, v2 = 0; | |||
if (state->geom_enable) { | |||
v = S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | | |||
S_028B54_GS_EN(1) | | |||
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); | |||
v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) | | |||
S_028A40_CUT_MODE(V_028A40_GS_CUT_128); | |||
} | |||
r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v); | |||
r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2); | |||
} | |||
static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) | |||
{ | |||
struct pipe_screen *screen = rctx->b.b.screen; | |||
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; | |||
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; | |||
struct r600_resource *rbuffer; | |||
r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); | |||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); | |||
if (state->enable) { | |||
rbuffer =(struct r600_resource*)state->esgs_ring.buffer; | |||
r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, | |||
(r600_resource_va(screen, &rbuffer->b.b)) >> 8); | |||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); | |||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); | |||
r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, | |||
state->esgs_ring.buffer_size >> 8); | |||
rbuffer =(struct r600_resource*)state->gsvs_ring.buffer; | |||
r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, | |||
(r600_resource_va(screen, &rbuffer->b.b)) >> 8); | |||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); | |||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); | |||
r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, | |||
state->gsvs_ring.buffer_size >> 8); | |||
} else { | |||
r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0); | |||
r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0); | |||
} | |||
r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); | |||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); | |||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); | |||
} | |||
void cayman_init_common_regs(struct r600_command_buffer *cb, | |||
enum chip_class ctx_chip_class, | |||
enum radeon_family ctx_family, | |||
@@ -3509,6 +3572,77 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader | |||
shader->flatshade = rctx->rasterizer->flatshade; | |||
} | |||
void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) | |||
{ | |||
struct r600_command_buffer *cb = &shader->command_buffer; | |||
struct r600_shader *rshader = &shader->shader; | |||
r600_init_command_buffer(cb, 32); | |||
r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, | |||
S_028890_NUM_GPRS(rshader->bc.ngpr) | | |||
S_028890_STACK_SIZE(rshader->bc.nstack)); | |||
r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES, | |||
r600_resource_va(ctx->screen, (void *)shader->bo) >> 8); | |||
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ | |||
} | |||
void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) | |||
{ | |||
struct r600_command_buffer *cb = &shader->command_buffer; | |||
struct r600_shader *rshader = &shader->shader; | |||
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; | |||
unsigned gsvs_itemsize = | |||
(cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2; | |||
r600_init_command_buffer(cb, 64); | |||
/* VGT_GS_OUT_PRIM_TYPE is written by r6000_draw_vbo */ | |||
/* VGT_GS_MODE is written by evergreen_emit_shader_stages */ | |||
r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1); | |||
r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, | |||
S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices)); | |||
/* XXX kernel checker fails | |||
r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT, | |||
S_028B90_CNT(0) | | |||
S_028B90_ENABLE(0)); | |||
*/ | |||
r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); | |||
r600_store_value(cb, cp_shader->ring_item_size >> 2); | |||
r600_store_value(cb, 0); | |||
r600_store_value(cb, 0); | |||
r600_store_value(cb, 0); | |||
r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, | |||
(rshader->ring_item_size) >> 2); | |||
r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE, | |||
gsvs_itemsize); | |||
r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3); | |||
r600_store_value(cb, gsvs_itemsize); | |||
r600_store_value(cb, gsvs_itemsize); | |||
r600_store_value(cb, gsvs_itemsize); | |||
/* FIXME calculate these values somehow ??? */ | |||
r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3); | |||
r600_store_value(cb, 0x80); /* GS_PER_ES */ | |||
r600_store_value(cb, 0x100); /* ES_PER_GS */ | |||
r600_store_value(cb, 0x2); /* GS_PER_VS */ | |||
r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS, | |||
S_028878_NUM_GPRS(rshader->bc.ngpr) | | |||
S_028878_STACK_SIZE(rshader->bc.nstack)); | |||
r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS, | |||
r600_resource_va(ctx->screen, (void *)shader->bo) >> 8); | |||
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ | |||
} | |||
void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) | |||
{ | |||
struct r600_command_buffer *cb = &shader->command_buffer; | |||
@@ -3918,6 +4052,10 @@ void evergreen_init_state_functions(struct r600_context *rctx) | |||
rctx->atoms[id++] = &rctx->b.streamout.begin_atom; | |||
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); | |||
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0); | |||
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0); | |||
r600_init_atom(rctx, &rctx->export_shader.atom, id++, r600_emit_shader, 0); | |||
r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 6); | |||
r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26); | |||
rctx->b.b.create_blend_state = evergreen_create_blend_state; | |||
rctx->b.b.create_depth_stencil_alpha_state = evergreen_create_dsa_state; |
@@ -1939,7 +1939,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) | |||
if (cf->end_of_program) | |||
fprintf(stderr, "EOP "); | |||
fprintf(stderr, "\n"); | |||
} else if (r600_isa_cf(cf->op)->flags & CF_STRM) { | |||
} else if (r600_isa_cf(cf->op)->flags & CF_MEM) { | |||
int o = 0; | |||
const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", | |||
"WRITE_IND_ACK"}; |
@@ -59,6 +59,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op | |||
util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb); | |||
util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso); | |||
util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); | |||
util_blitter_save_geometry_shader(rctx->blitter, rctx->gs_shader); | |||
util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets, | |||
(struct pipe_stream_output_target**)rctx->b.streamout.targets); | |||
util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso); |
@@ -301,6 +301,12 @@ void r600_begin_new_cs(struct r600_context *ctx) | |||
ctx->config_state.atom.dirty = true; | |||
ctx->stencil_ref.atom.dirty = true; | |||
ctx->vertex_fetch_shader.atom.dirty = true; | |||
ctx->export_shader.atom.dirty = true; | |||
if (ctx->gs_shader) { | |||
ctx->geometry_shader.atom.dirty = true; | |||
ctx->shader_stages.atom.dirty = true; | |||
ctx->gs_rings.atom.dirty = true; | |||
} | |||
ctx->vertex_shader.atom.dirty = true; | |||
ctx->viewport.atom.dirty = true; | |||
@@ -447,15 +447,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) | |||
static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) | |||
{ | |||
struct r600_screen *rscreen = (struct r600_screen *)pscreen; | |||
switch(shader) | |||
{ | |||
case PIPE_SHADER_FRAGMENT: | |||
case PIPE_SHADER_VERTEX: | |||
case PIPE_SHADER_COMPUTE: | |||
case PIPE_SHADER_COMPUTE: | |||
break; | |||
case PIPE_SHADER_GEOMETRY: | |||
/* XXX: support and enable geometry programs */ | |||
return 0; | |||
if (rscreen->b.chip_class < EVERGREEN) | |||
return 0; | |||
break; | |||
default: | |||
/* XXX: support tessellation on Evergreen */ | |||
return 0; |
@@ -38,7 +38,7 @@ | |||
#include "util/u_double_list.h" | |||
#include "util/u_transfer.h" | |||
#define R600_NUM_ATOMS 41 | |||
#define R600_NUM_ATOMS 42 | |||
/* the number of CS dwords for flushing and drawing */ | |||
#define R600_MAX_FLUSH_CS_DWORDS 16 | |||
@@ -46,13 +46,14 @@ | |||
#define R600_TRACE_CS_DWORDS 7 | |||
#define R600_MAX_USER_CONST_BUFFERS 13 | |||
#define R600_MAX_DRIVER_CONST_BUFFERS 3 | |||
#define R600_MAX_DRIVER_CONST_BUFFERS 4 | |||
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) | |||
/* start driver buffers after user buffers */ | |||
#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) | |||
#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) | |||
#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) | |||
#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3) | |||
#define R600_MAX_CONST_BUFFER_SIZE 4096 | |||
@@ -179,6 +180,18 @@ struct r600_viewport_state { | |||
struct pipe_viewport_state state; | |||
}; | |||
struct r600_shader_stages_state { | |||
struct r600_atom atom; | |||
unsigned geom_enable; | |||
}; | |||
struct r600_gs_rings_state { | |||
struct r600_atom atom; | |||
unsigned enable; | |||
struct pipe_constant_buffer esgs_ring; | |||
struct pipe_constant_buffer gsvs_ring; | |||
}; | |||
/* This must start from 16. */ | |||
/* features */ | |||
#define DBG_NO_LLVM (1 << 17) | |||
@@ -353,7 +366,7 @@ struct r600_fetch_shader { | |||
struct r600_shader_state { | |||
struct r600_atom atom; | |||
struct r600_pipe_shader_selector *shader; | |||
struct r600_pipe_shader *shader; | |||
}; | |||
struct r600_context { | |||
@@ -415,7 +428,11 @@ struct r600_context { | |||
struct r600_cso_state vertex_fetch_shader; | |||
struct r600_shader_state vertex_shader; | |||
struct r600_shader_state pixel_shader; | |||
struct r600_shader_state geometry_shader; | |||
struct r600_shader_state export_shader; | |||
struct r600_cs_shader_state cs_shader_state; | |||
struct r600_shader_stages_state shader_stages; | |||
struct r600_gs_rings_state gs_rings; | |||
struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; | |||
struct r600_textures_info samplers[PIPE_SHADER_TYPES]; | |||
/** Vertex buffers for fetch shaders */ | |||
@@ -427,6 +444,7 @@ struct r600_context { | |||
unsigned compute_cb_target_mask; | |||
struct r600_pipe_shader_selector *ps_shader; | |||
struct r600_pipe_shader_selector *vs_shader; | |||
struct r600_pipe_shader_selector *gs_shader; | |||
struct r600_rasterizer_state *rasterizer; | |||
bool alpha_to_one; | |||
bool force_blend_disable; | |||
@@ -506,6 +524,8 @@ void cayman_init_common_regs(struct r600_command_buffer *cb, | |||
void evergreen_init_state_functions(struct r600_context *rctx); | |||
void evergreen_init_atom_start_cs(struct r600_context *rctx); | |||
void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); | |||
void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); | |||
void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); | |||
void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); | |||
void *evergreen_create_db_flush_dsa(struct r600_context *rctx); | |||
void *evergreen_create_resolve_blend(struct r600_context *rctx); |
@@ -60,7 +60,7 @@ issued in the w slot as well. | |||
The compiler must issue the source argument to slots z, y, and x | |||
*/ | |||
static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
static int r600_shader_from_tgsi(struct r600_context *rctx, | |||
struct r600_pipe_shader *pipeshader, | |||
struct r600_shader_key key); | |||
@@ -104,17 +104,43 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so) | |||
} | |||
} | |||
static int store_shader(struct pipe_context *ctx, | |||
struct r600_pipe_shader *shader) | |||
{ | |||
struct r600_context *rctx = (struct r600_context *)ctx; | |||
uint32_t *ptr, i; | |||
if (shader->bo == NULL) { | |||
shader->bo = (struct r600_resource*) | |||
pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); | |||
if (shader->bo == NULL) { | |||
return -ENOMEM; | |||
} | |||
ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); | |||
if (R600_BIG_ENDIAN) { | |||
for (i = 0; i < shader->shader.bc.ndw; ++i) { | |||
ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]); | |||
} | |||
} else { | |||
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); | |||
} | |||
rctx->b.ws->buffer_unmap(shader->bo->cs_buf); | |||
} | |||
return 0; | |||
} | |||
int r600_pipe_shader_create(struct pipe_context *ctx, | |||
struct r600_pipe_shader *shader, | |||
struct r600_shader_key key) | |||
{ | |||
struct r600_context *rctx = (struct r600_context *)ctx; | |||
struct r600_pipe_shader_selector *sel = shader->selector; | |||
int r, i; | |||
uint32_t *ptr; | |||
int r; | |||
bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); | |||
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); | |||
unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); | |||
unsigned export_shader = key.vs_as_es; | |||
shader->shader.bc.isa = rctx->isa; | |||
@@ -126,7 +152,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, | |||
r600_dump_streamout(&sel->so); | |||
} | |||
} | |||
r = r600_shader_from_tgsi(rctx->screen, shader, key); | |||
r = r600_shader_from_tgsi(rctx, shader, key); | |||
if (r) { | |||
R600_ERR("translation from TGSI failed !\n"); | |||
return r; | |||
@@ -157,29 +183,39 @@ int r600_pipe_shader_create(struct pipe_context *ctx, | |||
} | |||
} | |||
/* Store the shader in a buffer. */ | |||
if (shader->bo == NULL) { | |||
shader->bo = (struct r600_resource*) | |||
pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); | |||
if (shader->bo == NULL) { | |||
return -ENOMEM; | |||
} | |||
ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); | |||
if (R600_BIG_ENDIAN) { | |||
for (i = 0; i < shader->shader.bc.ndw; ++i) { | |||
ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]); | |||
} | |||
} else { | |||
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); | |||
if (shader->gs_copy_shader) { | |||
if (dump) { | |||
// dump copy shader | |||
r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, | |||
&shader->gs_copy_shader->shader, dump, 0); | |||
if (r) | |||
return r; | |||
} | |||
rctx->b.ws->buffer_unmap(shader->bo->cs_buf); | |||
if ((r = store_shader(ctx, shader->gs_copy_shader))) | |||
return r; | |||
} | |||
/* Store the shader in a buffer. */ | |||
if ((r = store_shader(ctx, shader))) | |||
return r; | |||
/* Build state. */ | |||
switch (shader->shader.processor_type) { | |||
case TGSI_PROCESSOR_GEOMETRY: | |||
if (rctx->b.chip_class >= EVERGREEN) { | |||
evergreen_update_gs_state(ctx, shader); | |||
evergreen_update_vs_state(ctx, shader->gs_copy_shader); | |||
} else { | |||
assert(!"not suported yet"); | |||
} | |||
break; | |||
case TGSI_PROCESSOR_VERTEX: | |||
if (rctx->b.chip_class >= EVERGREEN) { | |||
evergreen_update_vs_state(ctx, shader); | |||
if (export_shader) | |||
evergreen_update_es_state(ctx, shader); | |||
else | |||
evergreen_update_vs_state(ctx, shader); | |||
} else { | |||
r600_update_vs_state(ctx, shader); | |||
} | |||
@@ -245,6 +281,9 @@ struct r600_shader_ctx { | |||
unsigned cv_output; | |||
int fragcoord_input; | |||
int native_integers; | |||
int next_ring_offset; | |||
int gs_next_vertex; | |||
struct r600_shader *gs_for_vs; | |||
}; | |||
struct r600_shader_tgsi_instruction { | |||
@@ -254,6 +293,7 @@ struct r600_shader_tgsi_instruction { | |||
int (*process)(struct r600_shader_ctx *ctx); | |||
}; | |||
static int emit_gs_ring_writes(struct r600_shader_ctx *ctx); | |||
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; | |||
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); | |||
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); | |||
@@ -285,7 +325,13 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) | |||
#endif | |||
for (j = 0; j < i->Instruction.NumSrcRegs; j++) { | |||
if (i->Src[j].Register.Dimension) { | |||
if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) { | |||
switch (i->Src[j].Register.File) { | |||
case TGSI_FILE_CONSTANT: | |||
break; | |||
case TGSI_FILE_INPUT: | |||
if (ctx->type == TGSI_PROCESSOR_GEOMETRY) | |||
break; | |||
default: | |||
R600_ERR("unsupported src %d (dimension %d)\n", j, | |||
i->Src[j].Register.Dimension); | |||
return -EINVAL; | |||
@@ -536,6 +582,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) | |||
if ((r = evergreen_interp_input(ctx, i))) | |||
return r; | |||
} | |||
} else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { | |||
/* FIXME probably skip inputs if they aren't passed in the ring */ | |||
ctx->shader->input[i].ring_offset = ctx->next_ring_offset; | |||
ctx->next_ring_offset += 16; | |||
} | |||
for (j = 1; j < count; ++j) { | |||
ctx->shader->input[i + j] = ctx->shader->input[i]; | |||
@@ -550,7 +600,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) | |||
ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; | |||
ctx->shader->output[i].interpolate = d->Interp.Interpolate; | |||
ctx->shader->output[i].write_mask = d->Declaration.UsageMask; | |||
if (ctx->type == TGSI_PROCESSOR_VERTEX) { | |||
if (ctx->type == TGSI_PROCESSOR_VERTEX || | |||
ctx->type == TGSI_PROCESSOR_GEOMETRY) { | |||
ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); | |||
switch (d->Semantic.Name) { | |||
case TGSI_SEMANTIC_CLIPDIST: | |||
@@ -773,6 +824,59 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx | |||
return 0; | |||
} | |||
static int fetch_gs_input(struct r600_shader_ctx *ctx, unsigned index, unsigned vtx_id, unsigned int dst_reg) | |||
{ | |||
struct r600_bytecode_vtx vtx; | |||
int r; | |||
int offset_reg = vtx_id / 3; | |||
int offset_chan = vtx_id % 3; | |||
/* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, | |||
* R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ | |||
if (offset_reg == 0 && offset_chan == 2) | |||
offset_chan = 3; | |||
memset(&vtx, 0, sizeof(vtx)); | |||
vtx.buffer_id = R600_GS_RING_CONST_BUFFER; | |||
vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ | |||
vtx.src_gpr = offset_reg; | |||
vtx.src_sel_x = offset_chan; | |||
vtx.offset = index * 16; /*bytes*/ | |||
vtx.mega_fetch_count = 16; | |||
vtx.dst_gpr = dst_reg; | |||
vtx.dst_sel_x = 0; /* SEL_X */ | |||
vtx.dst_sel_y = 1; /* SEL_Y */ | |||
vtx.dst_sel_z = 2; /* SEL_Z */ | |||
vtx.dst_sel_w = 3; /* SEL_W */ | |||
vtx.use_const_fields = 1; | |||
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) | |||
return r; | |||
return 0; | |||
} | |||
static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) | |||
{ | |||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; | |||
int i; | |||
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { | |||
struct tgsi_full_src_register *src = &inst->Src[i]; | |||
if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { | |||
int treg = r600_get_temp(ctx); | |||
int index = src->Register.Index; | |||
int vtx_id = src->Dimension.Index; | |||
fetch_gs_input(ctx, index, vtx_id, treg); | |||
ctx->src[i].sel = treg; | |||
} | |||
} | |||
return 0; | |||
} | |||
static int tgsi_split_constant(struct r600_shader_ctx *ctx) | |||
{ | |||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; | |||
@@ -983,10 +1087,247 @@ out_err: | |||
return r; | |||
} | |||
static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
static int generate_gs_copy_shader(struct r600_context *rctx, | |||
struct r600_pipe_shader *gs) | |||
{ | |||
struct r600_shader_ctx ctx = {}; | |||
struct r600_shader *gs_shader = &gs->shader; | |||
struct r600_pipe_shader *cshader; | |||
int ocnt = gs_shader->noutput; | |||
struct r600_bytecode_alu alu; | |||
struct r600_bytecode_vtx vtx; | |||
struct r600_bytecode_output output; | |||
struct r600_bytecode_cf *cf_jump, *cf_pop, | |||
*last_exp_pos = NULL, *last_exp_param = NULL; | |||
int i, next_pos = 60, next_param = 0; | |||
cshader = calloc(1, sizeof(struct r600_pipe_shader)); | |||
if (!cshader) | |||
return 0; | |||
memcpy(cshader->shader.output, gs_shader->output, ocnt * | |||
sizeof(struct r600_shader_io)); | |||
cshader->shader.noutput = ocnt; | |||
ctx.shader = &cshader->shader; | |||
ctx.bc = &ctx.shader->bc; | |||
ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; | |||
r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, | |||
rctx->screen->has_compressed_msaa_texturing); | |||
ctx.bc->isa = rctx->isa; | |||
/* R0.x = R0.x & 0x3fffffff */ | |||
memset(&alu, 0, sizeof(alu)); | |||
alu.op = ALU_OP2_AND_INT; | |||
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; | |||
alu.src[1].value = 0x3fffffff; | |||
alu.dst.write = 1; | |||
r600_bytecode_add_alu(ctx.bc, &alu); | |||
/* R0.y = R0.x >> 30 */ | |||
memset(&alu, 0, sizeof(alu)); | |||
alu.op = ALU_OP2_LSHR_INT; | |||
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; | |||
alu.src[1].value = 0x1e; | |||
alu.dst.chan = 1; | |||
alu.dst.write = 1; | |||
alu.last = 1; | |||
r600_bytecode_add_alu(ctx.bc, &alu); | |||
/* PRED_SETE_INT __, R0.y, 0 */ | |||
memset(&alu, 0, sizeof(alu)); | |||
alu.op = ALU_OP2_PRED_SETE_INT; | |||
alu.src[0].chan = 1; | |||
alu.src[1].sel = V_SQ_ALU_SRC_0; | |||
alu.execute_mask = 1; | |||
alu.update_pred = 1; | |||
alu.last = 1; | |||
r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); | |||
r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); | |||
cf_jump = ctx.bc->cf_last; | |||
/* fetch vertex data from GSVS ring */ | |||
for (i = 0; i < ocnt; ++i) { | |||
struct r600_shader_io *out = &ctx.shader->output[i]; | |||
out->gpr = i + 1; | |||
out->ring_offset = i * 16; | |||
memset(&vtx, 0, sizeof(vtx)); | |||
vtx.op = FETCH_OP_VFETCH; | |||
vtx.buffer_id = R600_GS_RING_CONST_BUFFER; | |||
vtx.fetch_type = 2; | |||
vtx.offset = out->ring_offset; | |||
vtx.dst_gpr = out->gpr; | |||
vtx.dst_sel_x = 0; | |||
vtx.dst_sel_y = 1; | |||
vtx.dst_sel_z = 2; | |||
vtx.dst_sel_w = 3; | |||
vtx.use_const_fields = 1; | |||
r600_bytecode_add_vtx(ctx.bc, &vtx); | |||
} | |||
/* XXX handle clipvertex, streamout? */ | |||
/* export vertex data */ | |||
/* XXX factor out common code with r600_shader_from_tgsi ? */ | |||
for (i = 0; i < ocnt; ++i) { | |||
struct r600_shader_io *out = &ctx.shader->output[i]; | |||
if (out->name == TGSI_SEMANTIC_CLIPVERTEX) | |||
continue; | |||
memset(&output, 0, sizeof(output)); | |||
output.gpr = out->gpr; | |||
output.elem_size = 3; | |||
output.swizzle_x = 0; | |||
output.swizzle_y = 1; | |||
output.swizzle_z = 2; | |||
output.swizzle_w = 3; | |||
output.burst_count = 1; | |||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; | |||
output.op = CF_OP_EXPORT; | |||
switch (out->name) { | |||
case TGSI_SEMANTIC_POSITION: | |||
output.array_base = next_pos++; | |||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
case TGSI_SEMANTIC_PSIZE: | |||
output.array_base = next_pos++; | |||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
case TGSI_SEMANTIC_CLIPDIST: | |||
/* spi_sid is 0 for clipdistance outputs that were generated | |||
* for clipvertex - we don't need to pass them to PS */ | |||
if (out->spi_sid) { | |||
/* duplicate it as PARAM to pass to the pixel shader */ | |||
output.array_base = next_param++; | |||
r600_bytecode_add_output(ctx.bc, &output); | |||
last_exp_param = ctx.bc->cf_last; | |||
} | |||
output.array_base = next_pos++; | |||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
case TGSI_SEMANTIC_FOG: | |||
output.swizzle_y = 4; /* 0 */ | |||
output.swizzle_z = 4; /* 0 */ | |||
output.swizzle_w = 5; /* 1 */ | |||
break; | |||
} | |||
r600_bytecode_add_output(ctx.bc, &output); | |||
if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) | |||
last_exp_param = ctx.bc->cf_last; | |||
else | |||
last_exp_pos = ctx.bc->cf_last; | |||
} | |||
if (!last_exp_pos) { | |||
memset(&output, 0, sizeof(output)); | |||
output.gpr = 0; | |||
output.elem_size = 3; | |||
output.swizzle_x = 7; | |||
output.swizzle_y = 7; | |||
output.swizzle_z = 7; | |||
output.swizzle_w = 7; | |||
output.burst_count = 1; | |||
output.type = 2; | |||
output.op = CF_OP_EXPORT; | |||
output.array_base = next_pos++; | |||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
r600_bytecode_add_output(ctx.bc, &output); | |||
last_exp_pos = ctx.bc->cf_last; | |||
} | |||
if (!last_exp_param) { | |||
memset(&output, 0, sizeof(output)); | |||
output.gpr = 0; | |||
output.elem_size = 3; | |||
output.swizzle_x = 7; | |||
output.swizzle_y = 7; | |||
output.swizzle_z = 7; | |||
output.swizzle_w = 7; | |||
output.burst_count = 1; | |||
output.type = 2; | |||
output.op = CF_OP_EXPORT; | |||
output.array_base = next_param++; | |||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; | |||
r600_bytecode_add_output(ctx.bc, &output); | |||
last_exp_param = ctx.bc->cf_last; | |||
} | |||
last_exp_pos->op = CF_OP_EXPORT_DONE; | |||
last_exp_param->op = CF_OP_EXPORT_DONE; | |||
r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); | |||
cf_pop = ctx.bc->cf_last; | |||
cf_jump->cf_addr = cf_pop->id + 2; | |||
cf_jump->pop_count = 1; | |||
cf_pop->cf_addr = cf_pop->id + 2; | |||
cf_pop->pop_count = 1; | |||
r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); | |||
ctx.bc->cf_last->end_of_program = 1; | |||
gs->gs_copy_shader = cshader; | |||
ctx.bc->nstack = 1; | |||
cshader->shader.ring_item_size = ocnt * 16; | |||
return r600_bytecode_build(ctx.bc); | |||
} | |||
static int emit_gs_ring_writes(struct r600_shader_ctx *ctx) | |||
{ | |||
struct r600_bytecode_output output; | |||
int i, k, ring_offset; | |||
for (i = 0; i < ctx->shader->noutput; i++) { | |||
if (ctx->gs_for_vs) { | |||
/* for ES we need to lookup corresponding ring offset expected by GS | |||
* (map this output to GS input by name and sid) */ | |||
/* FIXME precompute offsets */ | |||
ring_offset = -1; | |||
for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { | |||
struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; | |||
struct r600_shader_io *out = &ctx->shader->output[i]; | |||
if (in->name == out->name && in->sid == out->sid) | |||
ring_offset = in->ring_offset; | |||
} | |||
if (ring_offset == -1) { | |||
R600_ERR("error mapping VS->GS outputs\n"); | |||
return -1; | |||
} | |||
} else | |||
ring_offset = i * 16; | |||
/* next_ring_offset after parsing input decls contains total size of | |||
* single vertex data, gs_next_vertex - current vertex index */ | |||
ring_offset += ctx->next_ring_offset * ctx->gs_next_vertex; | |||
memset(&output, 0, sizeof(struct r600_bytecode_output)); | |||
output.gpr = ctx->shader->output[i].gpr; | |||
output.elem_size = 3; | |||
output.comp_mask = 0xF; | |||
output.burst_count = 1; | |||
output.op = CF_OP_MEM_RING; | |||
output.array_base = ring_offset >> 2; /* in dwords */ | |||
r600_bytecode_add_output(ctx->bc, &output); | |||
} | |||
++ctx->gs_next_vertex; | |||
return 0; | |||
} | |||
static int r600_shader_from_tgsi(struct r600_context *rctx, | |||
struct r600_pipe_shader *pipeshader, | |||
struct r600_shader_key key) | |||
{ | |||
struct r600_screen *rscreen = rctx->screen; | |||
struct r600_shader *shader = &pipeshader->shader; | |||
struct tgsi_token *tokens = pipeshader->selector->tokens; | |||
struct pipe_stream_output_info so = pipeshader->selector->so; | |||
@@ -1002,6 +1343,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
/* Declarations used by llvm code */ | |||
bool use_llvm = false; | |||
bool indirect_gprs; | |||
bool ring_outputs = false; | |||
#ifdef R600_USE_LLVM | |||
use_llvm = !(rscreen->b.debug_flags & DBG_NO_LLVM); | |||
@@ -1010,6 +1352,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
ctx.shader = shader; | |||
ctx.native_integers = true; | |||
shader->vs_as_es = key.vs_as_es; | |||
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, | |||
rscreen->has_compressed_msaa_texturing); | |||
ctx.tokens = tokens; | |||
@@ -1021,6 +1365,17 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
shader->processor_type = ctx.type; | |||
ctx.bc->type = shader->processor_type; | |||
ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY); | |||
if (key.vs_as_es) { | |||
ctx.gs_for_vs = &rctx->gs_shader->current->shader; | |||
} else { | |||
ctx.gs_for_vs = NULL; | |||
} | |||
ctx.next_ring_offset = 0; | |||
ctx.gs_next_vertex = 0; | |||
ctx.face_gpr = -1; | |||
ctx.fragcoord_input = -1; | |||
ctx.colors_used = 0; | |||
@@ -1073,6 +1428,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { | |||
ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); | |||
} | |||
if (ctx.type == TGSI_PROCESSOR_GEOMETRY && ctx.bc->chip_class >= EVERGREEN) { | |||
/* FIXME 1 would be enough in some cases (3 or less input vertices) */ | |||
ctx.file_offset[TGSI_FILE_INPUT] = 2; | |||
} | |||
ctx.use_llvm = use_llvm; | |||
if (use_llvm) { | |||
@@ -1149,6 +1508,15 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
case TGSI_PROPERTY_VS_PROHIBIT_UCPS: | |||
/* we don't need this one */ | |||
break; | |||
case TGSI_PROPERTY_GS_INPUT_PRIM: | |||
shader->gs_input_prim = property->u[0].Data; | |||
break; | |||
case TGSI_PROPERTY_GS_OUTPUT_PRIM: | |||
shader->gs_output_prim = property->u[0].Data; | |||
break; | |||
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: | |||
shader->gs_max_out_vertices = property->u[0].Data; | |||
break; | |||
} | |||
break; | |||
default: | |||
@@ -1158,6 +1526,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
} | |||
} | |||
shader->ring_item_size = ctx.next_ring_offset; | |||
/* Process two side if needed */ | |||
if (shader->two_side && ctx.colors_used) { | |||
int i, count = ctx.shader->ninput; | |||
@@ -1298,6 +1668,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
goto out_err; | |||
if ((r = tgsi_split_literal_constant(&ctx))) | |||
goto out_err; | |||
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) | |||
if ((r = tgsi_split_gs_inputs(&ctx))) | |||
goto out_err; | |||
if (ctx.bc->chip_class == CAYMAN) | |||
ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; | |||
else if (ctx.bc->chip_class >= EVERGREEN) | |||
@@ -1319,7 +1692,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
noutput = shader->noutput; | |||
if (ctx.clip_vertex_write) { | |||
if (!ring_outputs && ctx.clip_vertex_write) { | |||
unsigned clipdist_temp[2]; | |||
clipdist_temp[0] = r600_get_temp(&ctx); | |||
@@ -1370,117 +1743,122 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
} | |||
/* Add stream outputs. */ | |||
if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) | |||
if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && | |||
so.num_outputs && !use_llvm) | |||
emit_streamout(&ctx, &so); | |||
/* export output */ | |||
for (i = 0, j = 0; i < noutput; i++, j++) { | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = shader->output[i].gpr; | |||
output[j].elem_size = 3; | |||
output[j].swizzle_x = 0; | |||
output[j].swizzle_y = 1; | |||
output[j].swizzle_z = 2; | |||
output[j].swizzle_w = 3; | |||
output[j].burst_count = 1; | |||
output[j].type = -1; | |||
output[j].op = CF_OP_EXPORT; | |||
switch (ctx.type) { | |||
case TGSI_PROCESSOR_VERTEX: | |||
switch (shader->output[i].name) { | |||
case TGSI_SEMANTIC_POSITION: | |||
output[j].array_base = next_pos_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
if (ring_outputs) { | |||
if (key.vs_as_es) | |||
emit_gs_ring_writes(&ctx); | |||
} else { | |||
/* export output */ | |||
for (i = 0, j = 0; i < noutput; i++, j++) { | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = shader->output[i].gpr; | |||
output[j].elem_size = 3; | |||
output[j].swizzle_x = 0; | |||
output[j].swizzle_y = 1; | |||
output[j].swizzle_z = 2; | |||
output[j].swizzle_w = 3; | |||
output[j].burst_count = 1; | |||
output[j].type = -1; | |||
output[j].op = CF_OP_EXPORT; | |||
switch (ctx.type) { | |||
case TGSI_PROCESSOR_VERTEX: | |||
switch (shader->output[i].name) { | |||
case TGSI_SEMANTIC_POSITION: | |||
output[j].array_base = next_pos_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
case TGSI_SEMANTIC_PSIZE: | |||
output[j].array_base = next_pos_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
case TGSI_SEMANTIC_CLIPVERTEX: | |||
j--; | |||
break; | |||
case TGSI_SEMANTIC_CLIPDIST: | |||
output[j].array_base = next_pos_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
/* spi_sid is 0 for clipdistance outputs that were generated | |||
* for clipvertex - we don't need to pass them to PS */ | |||
if (shader->output[i].spi_sid) { | |||
j++; | |||
/* duplicate it as PARAM to pass to the pixel shader */ | |||
memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); | |||
output[j].array_base = next_param_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; | |||
} | |||
break; | |||
case TGSI_SEMANTIC_FOG: | |||
output[j].swizzle_y = 4; /* 0 */ | |||
output[j].swizzle_z = 4; /* 0 */ | |||
output[j].swizzle_w = 5; /* 1 */ | |||
break; | |||
} | |||
break; | |||
case TGSI_PROCESSOR_FRAGMENT: | |||
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { | |||
/* never export more colors than the number of CBs */ | |||
if (shader->output[i].sid >= max_color_exports) { | |||
/* skip export */ | |||
case TGSI_SEMANTIC_PSIZE: | |||
output[j].array_base = next_pos_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
break; | |||
case TGSI_SEMANTIC_CLIPVERTEX: | |||
j--; | |||
continue; | |||
} | |||
output[j].swizzle_w = key.alpha_to_one ? 5 : 3; | |||
output[j].array_base = shader->output[i].sid; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
shader->nr_ps_color_exports++; | |||
if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { | |||
for (k = 1; k < max_color_exports; k++) { | |||
break; | |||
case TGSI_SEMANTIC_CLIPDIST: | |||
output[j].array_base = next_pos_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; | |||
/* spi_sid is 0 for clipdistance outputs that were generated | |||
* for clipvertex - we don't need to pass them to PS */ | |||
if (shader->output[i].spi_sid) { | |||
j++; | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = shader->output[i].gpr; | |||
output[j].elem_size = 3; | |||
output[j].swizzle_x = 0; | |||
output[j].swizzle_y = 1; | |||
output[j].swizzle_z = 2; | |||
output[j].swizzle_w = key.alpha_to_one ? 5 : 3; | |||
output[j].burst_count = 1; | |||
output[j].array_base = k; | |||
output[j].op = CF_OP_EXPORT; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
shader->nr_ps_color_exports++; | |||
/* duplicate it as PARAM to pass to the pixel shader */ | |||
memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); | |||
output[j].array_base = next_param_base++; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; | |||
} | |||
break; | |||
case TGSI_SEMANTIC_FOG: | |||
output[j].swizzle_y = 4; /* 0 */ | |||
output[j].swizzle_z = 4; /* 0 */ | |||
output[j].swizzle_w = 5; /* 1 */ | |||
break; | |||
} | |||
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { | |||
output[j].array_base = 61; | |||
output[j].swizzle_x = 2; | |||
output[j].swizzle_y = 7; | |||
output[j].swizzle_z = output[j].swizzle_w = 7; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { | |||
output[j].array_base = 61; | |||
output[j].swizzle_x = 7; | |||
output[j].swizzle_y = 1; | |||
output[j].swizzle_z = output[j].swizzle_w = 7; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
} else { | |||
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); | |||
break; | |||
case TGSI_PROCESSOR_FRAGMENT: | |||
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { | |||
/* never export more colors than the number of CBs */ | |||
if (shader->output[i].sid >= max_color_exports) { | |||
/* skip export */ | |||
j--; | |||
continue; | |||
} | |||
output[j].swizzle_w = key.alpha_to_one ? 5 : 3; | |||
output[j].array_base = shader->output[i].sid; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
shader->nr_ps_color_exports++; | |||
if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { | |||
for (k = 1; k < max_color_exports; k++) { | |||
j++; | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = shader->output[i].gpr; | |||
output[j].elem_size = 3; | |||
output[j].swizzle_x = 0; | |||
output[j].swizzle_y = 1; | |||
output[j].swizzle_z = 2; | |||
output[j].swizzle_w = key.alpha_to_one ? 5 : 3; | |||
output[j].burst_count = 1; | |||
output[j].array_base = k; | |||
output[j].op = CF_OP_EXPORT; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
shader->nr_ps_color_exports++; | |||
} | |||
} | |||
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { | |||
output[j].array_base = 61; | |||
output[j].swizzle_x = 2; | |||
output[j].swizzle_y = 7; | |||
output[j].swizzle_z = output[j].swizzle_w = 7; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { | |||
output[j].array_base = 61; | |||
output[j].swizzle_x = 7; | |||
output[j].swizzle_y = 1; | |||
output[j].swizzle_z = output[j].swizzle_w = 7; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
} else { | |||
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); | |||
r = -EINVAL; | |||
goto out_err; | |||
} | |||
break; | |||
default: | |||
R600_ERR("unsupported processor type %d\n", ctx.type); | |||
r = -EINVAL; | |||
goto out_err; | |||
} | |||
break; | |||
default: | |||
R600_ERR("unsupported processor type %d\n", ctx.type); | |||
r = -EINVAL; | |||
goto out_err; | |||
} | |||
if (output[j].type==-1) { | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; | |||
output[j].array_base = next_param_base++; | |||
if (output[j].type==-1) { | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; | |||
output[j].array_base = next_param_base++; | |||
} | |||
} | |||
} | |||
/* add fake position export */ | |||
if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { | |||
/* add fake position export */ | |||
if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = 0; | |||
output[j].elem_size = 3; | |||
@@ -1493,10 +1871,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
output[j].array_base = next_pos_base; | |||
output[j].op = CF_OP_EXPORT; | |||
j++; | |||
} | |||
} | |||
/* add fake param output for vertex shader if no param is exported */ | |||
if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { | |||
/* add fake param output for vertex shader if no param is exported */ | |||
if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = 0; | |||
output[j].elem_size = 3; | |||
@@ -1509,39 +1887,40 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
output[j].array_base = 0; | |||
output[j].op = CF_OP_EXPORT; | |||
j++; | |||
} | |||
} | |||
/* add fake pixel export */ | |||
if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = 0; | |||
output[j].elem_size = 3; | |||
output[j].swizzle_x = 7; | |||
output[j].swizzle_y = 7; | |||
output[j].swizzle_z = 7; | |||
output[j].swizzle_w = 7; | |||
output[j].burst_count = 1; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
output[j].array_base = 0; | |||
output[j].op = CF_OP_EXPORT; | |||
j++; | |||
} | |||
noutput = j; | |||
/* add fake pixel export */ | |||
if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { | |||
memset(&output[j], 0, sizeof(struct r600_bytecode_output)); | |||
output[j].gpr = 0; | |||
output[j].elem_size = 3; | |||
output[j].swizzle_x = 7; | |||
output[j].swizzle_y = 7; | |||
output[j].swizzle_z = 7; | |||
output[j].swizzle_w = 7; | |||
output[j].burst_count = 1; | |||
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; | |||
output[j].array_base = 0; | |||
output[j].op = CF_OP_EXPORT; | |||
j++; | |||
} | |||
noutput = j; | |||
/* set export done on last export of each type */ | |||
for (i = noutput - 1, output_done = 0; i >= 0; i--) { | |||
if (!(output_done & (1 << output[i].type))) { | |||
output_done |= (1 << output[i].type); | |||
output[i].op = CF_OP_EXPORT_DONE; | |||
/* set export done on last export of each type */ | |||
for (i = noutput - 1, output_done = 0; i >= 0; i--) { | |||
if (!(output_done & (1 << output[i].type))) { | |||
output_done |= (1 << output[i].type); | |||
output[i].op = CF_OP_EXPORT_DONE; | |||
} | |||
} | |||
} | |||
/* add output to bytecode */ | |||
if (!use_llvm) { | |||
for (i = 0; i < noutput; i++) { | |||
r = r600_bytecode_add_output(ctx.bc, &output[i]); | |||
if (r) | |||
goto out_err; | |||
/* add output to bytecode */ | |||
if (!use_llvm) { | |||
for (i = 0; i < noutput; i++) { | |||
r = r600_bytecode_add_output(ctx.bc, &output[i]); | |||
if (r) | |||
goto out_err; | |||
} | |||
} | |||
} | |||
@@ -1552,7 +1931,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
else { | |||
const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op); | |||
if (last->flags & CF_CLAUSE) | |||
/* alu clause instructions don't have EOP bit, so add NOP */ | |||
if (last->flags & CF_ALU) | |||
r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); | |||
ctx.bc->cf_last->end_of_program = 1; | |||
@@ -1567,6 +1947,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, | |||
goto out_err; | |||
} | |||
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { | |||
if ((r = generate_gs_copy_shader(rctx, pipeshader))) | |||
return r; | |||
} | |||
free(ctx.literals); | |||
tgsi_parse_free(&ctx.parse); | |||
return 0; | |||
@@ -5561,6 +5946,14 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) | |||
return 0; | |||
} | |||
static int tgsi_gs_emit(struct r600_shader_ctx *ctx) | |||
{ | |||
if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) | |||
emit_gs_ring_writes(ctx); | |||
return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); | |||
} | |||
static int tgsi_umad(struct r600_shader_ctx *ctx) | |||
{ | |||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; | |||
@@ -5934,8 +6327,8 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { | |||
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, | |||
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, | |||
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, | |||
{TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, | |||
{TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, | |||
{TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, | |||
{TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, | |||
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, | |||
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, | |||
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, | |||
@@ -6126,8 +6519,8 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { | |||
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, | |||
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, | |||
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, | |||
{TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, | |||
{TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, | |||
{TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, | |||
{TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, | |||
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, | |||
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, | |||
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, |
@@ -37,6 +37,7 @@ struct r600_shader_io { | |||
unsigned lds_pos; /* for evergreen */ | |||
unsigned back_color_input; | |||
unsigned write_mask; | |||
int ring_offset; | |||
}; | |||
struct r600_shader { | |||
@@ -64,9 +65,17 @@ struct r600_shader { | |||
boolean has_txq_cube_array_z_comp; | |||
boolean uses_tex_buffers; | |||
/* geometry shader properties */ | |||
unsigned gs_input_prim; | |||
unsigned gs_output_prim; | |||
unsigned gs_max_out_vertices; | |||
/* size in bytes of a data item in the ring (single vertex data) */ | |||
unsigned ring_item_size; | |||
unsigned indirect_files; | |||
unsigned max_arrays; | |||
unsigned num_arrays; | |||
unsigned vs_as_es; | |||
struct r600_shader_array * arrays; | |||
}; | |||
@@ -74,6 +83,7 @@ struct r600_shader_key { | |||
unsigned color_two_side:1; | |||
unsigned alpha_to_one:1; | |||
unsigned nr_cbufs:4; | |||
unsigned vs_as_es:1; | |||
}; | |||
struct r600_shader_array { | |||
@@ -85,6 +95,8 @@ struct r600_shader_array { | |||
struct r600_pipe_shader { | |||
struct r600_pipe_shader_selector *selector; | |||
struct r600_pipe_shader *next_variant; | |||
/* for GS - corresponding copy shader (installed as VS) */ | |||
struct r600_pipe_shader *gs_copy_shader; | |||
struct r600_shader shader; | |||
struct r600_command_buffer command_buffer; /* register writes */ | |||
struct r600_resource *bo; |
@@ -693,6 +693,8 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex | |||
/* Dual-source blending only makes sense with nr_cbufs == 1. */ | |||
if (key.nr_cbufs == 1 && rctx->dual_src_blend) | |||
key.nr_cbufs = 2; | |||
} else if (sel->type == PIPE_SHADER_VERTEX) { | |||
key.vs_as_es = (rctx->gs_shader != NULL); | |||
} | |||
return key; | |||
} | |||
@@ -792,6 +794,12 @@ static void *r600_create_vs_state(struct pipe_context *ctx, | |||
return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); | |||
} | |||
static void *r600_create_gs_state(struct pipe_context *ctx, | |||
const struct pipe_shader_state *state) | |||
{ | |||
return r600_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY); | |||
} | |||
static void r600_bind_ps_state(struct pipe_context *ctx, void *state) | |||
{ | |||
struct r600_context *rctx = (struct r600_context *)ctx; | |||
@@ -813,6 +821,13 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state) | |||
rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride; | |||
} | |||
static void r600_bind_gs_state(struct pipe_context *ctx, void *state) | |||
{ | |||
struct r600_context *rctx = (struct r600_context *)ctx; | |||
rctx->gs_shader = (struct r600_pipe_shader_selector *)state; | |||
} | |||
static void r600_delete_shader_selector(struct pipe_context *ctx, | |||
struct r600_pipe_shader_selector *sel) | |||
{ | |||
@@ -853,6 +868,20 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state) | |||
r600_delete_shader_selector(ctx, sel); | |||
} | |||
static void r600_delete_gs_state(struct pipe_context *ctx, void *state) | |||
{ | |||
struct r600_context *rctx = (struct r600_context *)ctx; | |||
struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; | |||
if (rctx->gs_shader == sel) { | |||
rctx->gs_shader = NULL; | |||
} | |||
r600_delete_shader_selector(ctx, sel); | |||
} | |||
void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) | |||
{ | |||
if (state->dirty_mask) { | |||
@@ -1046,10 +1075,65 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s | |||
pipe_resource_reference(&cb.buffer, NULL); | |||
} | |||
static void update_shader_atom(struct pipe_context *ctx, | |||
struct r600_shader_state *state, | |||
struct r600_pipe_shader *shader) | |||
{ | |||
state->shader = shader; | |||
if (shader) { | |||
state->atom.num_dw = shader->command_buffer.num_dw; | |||
state->atom.dirty = true; | |||
r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo); | |||
} else { | |||
state->atom.num_dw = 0; | |||
state->atom.dirty = false; | |||
} | |||
} | |||
static void update_gs_block_state(struct r600_context *rctx, unsigned enable) | |||
{ | |||
if (rctx->shader_stages.geom_enable != enable) { | |||
rctx->shader_stages.geom_enable = enable; | |||
rctx->shader_stages.atom.dirty = true; | |||
} | |||
if (rctx->gs_rings.enable != enable) { | |||
rctx->gs_rings.enable = enable; | |||
rctx->gs_rings.atom.dirty = true; | |||
if (enable && !rctx->gs_rings.esgs_ring.buffer) { | |||
unsigned size = 0x1C000; | |||
rctx->gs_rings.esgs_ring.buffer = | |||
pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, | |||
PIPE_USAGE_STATIC, size); | |||
rctx->gs_rings.esgs_ring.buffer_size = size; | |||
size = 0x4000000; | |||
rctx->gs_rings.gsvs_ring.buffer = | |||
pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, | |||
PIPE_USAGE_STATIC, size); | |||
rctx->gs_rings.gsvs_ring.buffer_size = size; | |||
} | |||
if (enable) { | |||
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, | |||
R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring); | |||
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, | |||
R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring); | |||
} else { | |||
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, | |||
R600_GS_RING_CONST_BUFFER, NULL); | |||
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, | |||
R600_GS_RING_CONST_BUFFER, NULL); | |||
} | |||
} | |||
} | |||
static bool r600_update_derived_state(struct r600_context *rctx) | |||
{ | |||
struct pipe_context * ctx = (struct pipe_context*)rctx; | |||
bool ps_dirty = false, vs_dirty = false; | |||
bool ps_dirty = false, vs_dirty = false, gs_dirty = false; | |||
bool blend_disable; | |||
if (!rctx->blitter->running) { | |||
@@ -1067,22 +1151,54 @@ static bool r600_update_derived_state(struct r600_context *rctx) | |||
} | |||
} | |||
if (unlikely(rctx->vertex_shader.shader != rctx->vs_shader)) { | |||
update_gs_block_state(rctx, rctx->gs_shader != NULL); | |||
if (rctx->gs_shader) { | |||
r600_shader_select(ctx, rctx->gs_shader, &gs_dirty); | |||
if (unlikely(!rctx->gs_shader->current)) | |||
return false; | |||
if (rctx->b.chip_class >= EVERGREEN && !rctx->shader_stages.geom_enable) { | |||
rctx->shader_stages.geom_enable = true; | |||
rctx->shader_stages.atom.dirty = true; | |||
} | |||
/* gs_shader provides GS and VS (copy shader) */ | |||
if (unlikely(rctx->geometry_shader.shader != rctx->gs_shader->current)) { | |||
update_shader_atom(ctx, &rctx->geometry_shader, rctx->gs_shader->current); | |||
update_shader_atom(ctx, &rctx->vertex_shader, rctx->gs_shader->current->gs_copy_shader); | |||
} | |||
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty); | |||
if (unlikely(!rctx->vs_shader->current)) | |||
return false; | |||
/* vs_shader is used as ES */ | |||
if (unlikely(vs_dirty || rctx->export_shader.shader != rctx->vs_shader->current)) { | |||
update_shader_atom(ctx, &rctx->export_shader, rctx->vs_shader->current); | |||
} | |||
} else { | |||
if (unlikely(rctx->geometry_shader.shader)) { | |||
update_shader_atom(ctx, &rctx->geometry_shader, NULL); | |||
update_shader_atom(ctx, &rctx->export_shader, NULL); | |||
rctx->shader_stages.geom_enable = false; | |||
rctx->shader_stages.atom.dirty = true; | |||
} | |||
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty); | |||
if (unlikely(!rctx->vs_shader->current)) | |||
return false; | |||
rctx->vertex_shader.shader = rctx->vs_shader; | |||
rctx->vertex_shader.atom.dirty = true; | |||
r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo); | |||
if (unlikely(vs_dirty || rctx->vertex_shader.shader != rctx->vs_shader->current)) { | |||
update_shader_atom(ctx, &rctx->vertex_shader, rctx->vs_shader->current); | |||
/* Update clip misc state. */ | |||
if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || | |||
rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) { | |||
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl; | |||
rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write; | |||
rctx->clip_misc_state.atom.dirty = true; | |||
/* Update clip misc state. */ | |||
if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || | |||
rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) { | |||
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl; | |||
rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write; | |||
rctx->clip_misc_state.atom.dirty = true; | |||
} | |||
} | |||
} | |||
@@ -1090,7 +1206,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) | |||
if (unlikely(!rctx->ps_shader->current)) | |||
return false; | |||
if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader)) { | |||
if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader->current)) { | |||
if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) { | |||
rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs; | |||
@@ -1112,9 +1228,9 @@ static bool r600_update_derived_state(struct r600_context *rctx) | |||
r600_update_db_shader_control(rctx); | |||
} | |||
if (!ps_dirty && rctx->ps_shader && rctx->rasterizer && | |||
if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer && | |||
((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) || | |||
(rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade))) { | |||
(rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) { | |||
if (rctx->b.chip_class >= EVERGREEN) | |||
evergreen_update_ps_state(ctx, rctx->ps_shader->current); | |||
@@ -1122,11 +1238,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) | |||
r600_update_ps_state(ctx, rctx->ps_shader->current); | |||
} | |||
rctx->pixel_shader.shader = rctx->ps_shader; | |||
rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw; | |||
rctx->pixel_shader.atom.dirty = true; | |||
r600_context_add_resource_size(ctx, | |||
(struct pipe_resource *)rctx->ps_shader->current->bo); | |||
update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current); | |||
} | |||
/* on R600 we stuff masks + txq info into one constant buffer */ | |||
@@ -1165,6 +1277,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) | |||
rctx->blend_state.cso, | |||
blend_disable); | |||
} | |||
return true; | |||
} | |||
@@ -1606,11 +1719,14 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) | |||
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) | |||
{ | |||
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; | |||
struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader->current; | |||
struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader; | |||
r600_emit_command_buffer(cs, &shader->command_buffer); | |||
if (!shader) | |||
return; | |||
r600_emit_command_buffer(cs, &shader->command_buffer); | |||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); | |||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo, RADEON_USAGE_READ)); | |||
} | |||
@@ -2139,6 +2255,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) | |||
{ | |||
rctx->b.b.create_fs_state = r600_create_ps_state; | |||
rctx->b.b.create_vs_state = r600_create_vs_state; | |||
rctx->b.b.create_gs_state = r600_create_gs_state; | |||
rctx->b.b.create_vertex_elements_state = r600_create_vertex_fetch_shader; | |||
rctx->b.b.bind_blend_state = r600_bind_blend_state; | |||
rctx->b.b.bind_depth_stencil_alpha_state = r600_bind_dsa_state; | |||
@@ -2147,6 +2264,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) | |||
rctx->b.b.bind_rasterizer_state = r600_bind_rs_state; | |||
rctx->b.b.bind_vertex_elements_state = r600_bind_vertex_elements; | |||
rctx->b.b.bind_vs_state = r600_bind_vs_state; | |||
rctx->b.b.bind_gs_state = r600_bind_gs_state; | |||
rctx->b.b.delete_blend_state = r600_delete_blend_state; | |||
rctx->b.b.delete_depth_stencil_alpha_state = r600_delete_dsa_state; | |||
rctx->b.b.delete_fs_state = r600_delete_ps_state; | |||
@@ -2154,6 +2272,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) | |||
rctx->b.b.delete_sampler_state = r600_delete_sampler_state; | |||
rctx->b.b.delete_vertex_elements_state = r600_delete_vertex_elements; | |||
rctx->b.b.delete_vs_state = r600_delete_vs_state; | |||
rctx->b.b.delete_gs_state = r600_delete_gs_state; | |||
rctx->b.b.set_blend_color = r600_set_blend_color; | |||
rctx->b.b.set_clip_state = r600_set_clip_state; | |||
rctx->b.b.set_constant_buffer = r600_set_constant_buffer; |
@@ -169,8 +169,10 @@ enum shader_target | |||
{ | |||
TARGET_UNKNOWN, | |||
TARGET_VS, | |||
TARGET_ES, | |||
TARGET_PS, | |||
TARGET_GS, | |||
TARGET_GS_COPY, | |||
TARGET_COMPUTE, | |||
TARGET_FETCH, | |||
@@ -137,7 +137,7 @@ void bc_dump::dump(cf_node& n) { | |||
for (int k = 0; k < 4; ++k) | |||
s << chans[n.bc.sel[k]]; | |||
} else if (n.bc.op_ptr->flags & (CF_STRM | CF_RAT)) { | |||
} else if (n.bc.op_ptr->flags & CF_MEM) { | |||
static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", | |||
"WRITE_IND_ACK"}; | |||
fill_to(s, 18); |
@@ -63,7 +63,7 @@ int bc_finalizer::run() { | |||
// workaround for some problems on r6xx/7xx | |||
// add ALU NOP to each vertex shader | |||
if (!ctx.is_egcm() && sh.target == TARGET_VS) { | |||
if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) { | |||
cf_node *c = sh.create_clause(NST_ALU_CLAUSE); | |||
alu_group_node *g = sh.create_alu_group(); |
@@ -58,7 +58,10 @@ int bc_parser::decode() { | |||
if (pshader) { | |||
switch (bc->type) { | |||
case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break; | |||
case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break; | |||
case TGSI_PROCESSOR_VERTEX: | |||
t = pshader->vs_as_es ? TARGET_ES : TARGET_VS; | |||
break; | |||
case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break; | |||
case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break; | |||
default: assert(!"unknown shader target"); return -1; break; | |||
} | |||
@@ -134,8 +137,12 @@ int bc_parser::parse_decls() { | |||
} | |||
} | |||
if (sh->target == TARGET_VS) | |||
if (sh->target == TARGET_VS || sh->target == TARGET_ES) | |||
sh->add_input(0, 1, 0x0F); | |||
else if (sh->target == TARGET_GS) { | |||
sh->add_input(0, 1, 0x0F); | |||
sh->add_input(1, 1, 0x0F); | |||
} | |||
bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN | |||
&& sh->target == TARGET_PS; |
@@ -215,7 +215,7 @@ void shader::init() { | |||
void shader::init_call_fs(cf_node* cf) { | |||
unsigned gpr = 0; | |||
assert(target == TARGET_VS); | |||
assert(target == TARGET_VS || target == TARGET_ES); | |||
for(inputs_vec::const_iterator I = inputs.begin(), | |||
E = inputs.end(); I != E; ++I, ++gpr) { | |||
@@ -433,6 +433,7 @@ std::string shader::get_full_target_name() { | |||
const char* shader::get_shader_target_name() { | |||
switch (target) { | |||
case TARGET_VS: return "VS"; | |||
case TARGET_ES: return "ES"; | |||
case TARGET_PS: return "PS"; | |||
case TARGET_GS: return "GS"; | |||
case TARGET_COMPUTE: return "COMPUTE"; |