Browse Source

freedreno: add adreno 420 support

Very initial support.  Basic stuff working (es2gears, es2tri, and maybe
about half of glmark2).  Expect broken stuff.  Still missing: mem->gmem
(restore), queries, mipmaps (blob segfaults!), hw binning, etc.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
tags/10.5-branchpoint
Rob Clark 11 years ago
parent
commit
61c68b69d7
32 changed files with 3870 additions and 12 deletions
  1. 1
    0
      src/gallium/drivers/freedreno/Makefile.am
  2. 14
    0
      src/gallium/drivers/freedreno/Makefile.sources
  3. 127
    0
      src/gallium/drivers/freedreno/a4xx/fd4_blend.c
  4. 53
    0
      src/gallium/drivers/freedreno/a4xx/fd4_blend.h
  5. 172
    0
      src/gallium/drivers/freedreno/a4xx/fd4_context.c
  6. 102
    0
      src/gallium/drivers/freedreno/a4xx/fd4_context.h
  7. 326
    0
      src/gallium/drivers/freedreno/a4xx/fd4_draw.c
  8. 122
    0
      src/gallium/drivers/freedreno/a4xx/fd4_draw.h
  9. 625
    0
      src/gallium/drivers/freedreno/a4xx/fd4_emit.c
  10. 91
    0
      src/gallium/drivers/freedreno/a4xx/fd4_emit.h
  11. 415
    0
      src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
  12. 36
    0
      src/gallium/drivers/freedreno/a4xx/fd4_gmem.h
  13. 480
    0
      src/gallium/drivers/freedreno/a4xx/fd4_program.c
  14. 46
    0
      src/gallium/drivers/freedreno/a4xx/fd4_program.h
  15. 39
    0
      src/gallium/drivers/freedreno/a4xx/fd4_query.c
  16. 36
    0
      src/gallium/drivers/freedreno/a4xx/fd4_query.h
  17. 94
    0
      src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
  18. 56
    0
      src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
  19. 105
    0
      src/gallium/drivers/freedreno/a4xx/fd4_screen.c
  20. 36
    0
      src/gallium/drivers/freedreno/a4xx/fd4_screen.h
  21. 190
    0
      src/gallium/drivers/freedreno/a4xx/fd4_texture.c
  22. 68
    0
      src/gallium/drivers/freedreno/a4xx/fd4_texture.h
  23. 401
    0
      src/gallium/drivers/freedreno/a4xx/fd4_util.c
  24. 45
    0
      src/gallium/drivers/freedreno/a4xx/fd4_util.h
  25. 105
    0
      src/gallium/drivers/freedreno/a4xx/fd4_zsa.c
  26. 58
    0
      src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
  27. 5
    1
      src/gallium/drivers/freedreno/freedreno_screen.c
  28. 10
    5
      src/gallium/drivers/freedreno/ir3/ir3.c
  29. 1
    1
      src/gallium/drivers/freedreno/ir3/ir3.h
  30. 2
    1
      src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
  31. 6
    2
      src/gallium/drivers/freedreno/ir3/ir3_shader.c
  32. 3
    2
      src/gallium/drivers/freedreno/ir3/ir3_shader.h

+ 1
- 0
src/gallium/drivers/freedreno/Makefile.am View File

@@ -15,6 +15,7 @@ libfreedreno_la_SOURCES = \
$(C_SOURCES) \
$(a2xx_SOURCES) \
$(a3xx_SOURCES) \
$(a4xx_SOURCES) \
$(ir3_SOURCES)

noinst_PROGRAMS = ir3_compiler

+ 14
- 0
src/gallium/drivers/freedreno/Makefile.sources View File

@@ -89,6 +89,20 @@ a3xx_SOURCES := \
a3xx/fd3_zsa.c \
a3xx/fd3_zsa.h

a4xx_SOURCES := \
a4xx/fd4_blend.c \
a4xx/fd4_context.c \
a4xx/fd4_draw.c \
a4xx/fd4_emit.c \
a4xx/fd4_gmem.c \
a4xx/fd4_program.c \
a4xx/fd4_query.c \
a4xx/fd4_rasterizer.c \
a4xx/fd4_screen.c \
a4xx/fd4_texture.c \
a4xx/fd4_util.c \
a4xx/fd4_zsa.c

ir3_SOURCES := \
ir3/disasm-a3xx.c \
ir3/instr-a3xx.h \

+ 127
- 0
src/gallium/drivers/freedreno/a4xx/fd4_blend.c View File

@@ -0,0 +1,127 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"

#include "fd4_blend.h"
#include "fd4_context.h"
#include "fd4_util.h"

static enum a4xx_rb_blend_opcode
blend_func(unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
}

void *
fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
{
struct fd4_blend_stateobj *so;
// enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
int i;

if (cso->logicop_enable) {
// rop = cso->logicop_func; /* maps 1:1 */

switch (cso->logicop_func) {
case PIPE_LOGICOP_NOR:
case PIPE_LOGICOP_AND_INVERTED:
case PIPE_LOGICOP_AND_REVERSE:
case PIPE_LOGICOP_INVERT:
case PIPE_LOGICOP_XOR:
case PIPE_LOGICOP_NAND:
case PIPE_LOGICOP_AND:
case PIPE_LOGICOP_EQUIV:
case PIPE_LOGICOP_NOOP:
case PIPE_LOGICOP_OR_INVERTED:
case PIPE_LOGICOP_OR_REVERSE:
case PIPE_LOGICOP_OR:
reads_dest = true;
break;
}
}

if (cso->independent_blend_enable) {
DBG("Unsupported! independent blend state");
return NULL;
}

so = CALLOC_STRUCT(fd4_blend_stateobj);
if (!so)
return NULL;

so->base = *cso;

for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt = &cso->rt[i];

so->rb_mrt[i].blend_control =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));

so->rb_mrt[i].control =
0xc00 | /* XXX ROP_CODE ?? */
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);

if (rt->blend_enable)
so->rb_mrt[i].control |=
A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A4XX_RB_MRT_CONTROL_BLEND |
A4XX_RB_MRT_CONTROL_BLEND2;

if (reads_dest)
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;

if (cso->dither)
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}

return so;
}

+ 53
- 0
src/gallium/drivers/freedreno/a4xx/fd4_blend.h View File

@@ -0,0 +1,53 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_BLEND_H_
#define FD4_BLEND_H_

#include "pipe/p_state.h"
#include "pipe/p_context.h"

struct fd4_blend_stateobj {
struct pipe_blend_state base;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
} rb_mrt[8];
};

static INLINE struct fd4_blend_stateobj *
fd4_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd4_blend_stateobj *)blend;
}

void * fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);

#endif /* FD4_BLEND_H_ */

+ 172
- 0
src/gallium/drivers/freedreno/a4xx/fd4_context.c View File

@@ -0,0 +1,172 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/


#include "fd4_context.h"
#include "fd4_blend.h"
#include "fd4_draw.h"
#include "fd4_emit.h"
#include "fd4_gmem.h"
#include "fd4_program.h"
#include "fd4_query.h"
#include "fd4_rasterizer.h"
#include "fd4_texture.h"
#include "fd4_zsa.h"

static void
fd4_context_destroy(struct pipe_context *pctx)
{
struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));

util_dynarray_fini(&fd4_ctx->rbrc_patches);

fd_bo_del(fd4_ctx->vs_pvt_mem);
fd_bo_del(fd4_ctx->fs_pvt_mem);
fd_bo_del(fd4_ctx->vsc_size_mem);

pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx);
pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx);

pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);

fd_context_destroy(pctx);
}

/* TODO we could combine a few of these small buffers (solid_vbuf,
* blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
* save a tiny bit of memory
*/

static struct pipe_resource *
create_solid_vertexbuf(struct pipe_context *pctx)
{
static const float init_shader_const[] = {
-1.000000, +1.000000, +1.000000,
+1.000000, -1.000000, +1.000000,
};
struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
pipe_buffer_write(pctx, prsc, 0,
sizeof(init_shader_const), init_shader_const);
return prsc;
}

static struct pipe_resource *
create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
{
struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
return prsc;
}

static const uint8_t primtypes[PIPE_PRIM_MAX] = {
[PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
[PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST,
[PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
[PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN,
};

struct pipe_context *
fd4_context_create(struct pipe_screen *pscreen, void *priv)
{
struct fd_screen *screen = fd_screen(pscreen);
struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
struct pipe_context *pctx;

if (!fd4_ctx)
return NULL;

pctx = &fd4_ctx->base.base;

fd4_ctx->base.dev = fd_device_ref(screen->dev);
fd4_ctx->base.screen = fd_screen(pscreen);

pctx->destroy = fd4_context_destroy;
pctx->create_blend_state = fd4_blend_state_create;
pctx->create_rasterizer_state = fd4_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;

fd4_draw_init(pctx);
fd4_gmem_init(pctx);
fd4_texture_init(pctx);
fd4_prog_init(pctx);

pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
if (!pctx)
return NULL;

util_dynarray_init(&fd4_ctx->rbrc_patches);

fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
DRM_FREEDRENO_GEM_TYPE_KMEM);

fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
DRM_FREEDRENO_GEM_TYPE_KMEM);

fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
DRM_FREEDRENO_GEM_TYPE_KMEM);

fd4_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
fd4_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);

/* setup solid_vbuf_state: */
fd4_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
pctx, 1, (struct pipe_vertex_element[]){{
.vertex_buffer_index = 0,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
}});
fd4_ctx->solid_vbuf_state.vertexbuf.count = 1;
fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->solid_vbuf;

/* setup blit_vbuf_state: */
fd4_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
pctx, 2, (struct pipe_vertex_element[]){{
.vertex_buffer_index = 0,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32_FLOAT,
}, {
.vertex_buffer_index = 1,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
}});
fd4_ctx->blit_vbuf_state.vertexbuf.count = 2;
fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->blit_texcoord_vbuf;
fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd4_ctx->solid_vbuf;

fd4_query_context_init(pctx);

return pctx;
}

+ 102
- 0
src/gallium/drivers/freedreno/a4xx/fd4_context.h View File

@@ -0,0 +1,102 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_CONTEXT_H_
#define FD4_CONTEXT_H_

#include "freedreno_drmif.h"

#include "freedreno_context.h"

#include "ir3_shader.h"

struct fd4_context {
struct fd_context base;

/* Keep track of writes to RB_RENDER_CONTROL which need to be patched
* once we know whether or not to use GMEM, and GMEM tile pitch.
*/
struct util_dynarray rbrc_patches;

struct fd_bo *vs_pvt_mem, *fs_pvt_mem;

/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*/
struct fd_bo *vsc_size_mem;

/* vertex buf used for clear/gmem->mem vertices, and mem->gmem
* vertices:
*/
struct pipe_resource *solid_vbuf;

/* vertex buf used for mem->gmem tex coords:
*/
struct pipe_resource *blit_texcoord_vbuf;

/* vertex state for solid_vbuf:
* - solid_vbuf / 12 / R32G32B32_FLOAT
*/
struct fd_vertex_state solid_vbuf_state;

/* vertex state for blit_prog:
* - blit_texcoord_vbuf / 8 / R32G32_FLOAT
* - solid_vbuf / 12 / R32G32B32_FLOAT
*/
struct fd_vertex_state blit_vbuf_state;

/* if *any* of bits are set in {v,f}saturate_{s,t,r} */
bool vsaturate, fsaturate;

/* bitmask of sampler which needs coords clamped for vertex
* shader:
*/
unsigned vsaturate_s, vsaturate_t, vsaturate_r;

/* bitmask of sampler which needs coords clamped for frag
* shader:
*/
unsigned fsaturate_s, fsaturate_t, fsaturate_r;

/* some state changes require a different shader variant. Keep
* track of this so we know when we need to re-emit shader state
* due to variant change. See fixup_shader_state()
*/
struct ir3_shader_key last_key;
};

static INLINE struct fd4_context *
fd4_context(struct fd_context *ctx)
{
return (struct fd4_context *)ctx;
}

struct pipe_context *
fd4_context_create(struct pipe_screen *pscreen, void *priv);

#endif /* FD4_CONTEXT_H_ */

+ 326
- 0
src/gallium/drivers/freedreno/a4xx/fd4_draw.c View File

@@ -0,0 +1,326 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"

#include "freedreno_state.h"
#include "freedreno_resource.h"

#include "fd4_draw.h"
#include "fd4_context.h"
#include "fd4_emit.h"
#include "fd4_program.h"
#include "fd4_util.h"
#include "fd4_zsa.h"


static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit)
{
const struct pipe_draw_info *info = emit->info;

fd4_emit_state(ctx, ring, emit);

if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd4_emit_vertex_bufs(ring, emit);

OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */

OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);

fd4_draw_emit(ctx, ring,
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info);
}

/* fixup dirty shader state in case some "unrelated" (from the state-
* tracker's perspective) state change causes us to switch to a
* different variant.
*/
static void
fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct ir3_shader_key *last_key = &fd4_ctx->last_key;

if (!ir3_shader_key_equal(last_key, key)) {
ctx->dirty |= FD_DIRTY_PROG;

if (last_key->has_per_samp || key->has_per_samp) {
if ((last_key->vsaturate_s != key->vsaturate_s) ||
(last_key->vsaturate_t != key->vsaturate_t) ||
(last_key->vsaturate_r != key->vsaturate_r))
ctx->prog.dirty |= FD_SHADER_DIRTY_VP;

if ((last_key->fsaturate_s != key->fsaturate_s) ||
(last_key->fsaturate_t != key->fsaturate_t) ||
(last_key->fsaturate_r != key->fsaturate_r))
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
}

if (last_key->color_two_side != key->color_two_side)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

if (last_key->half_precision != key->half_precision)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

if (last_key->alpha != key->alpha)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

fd4_ctx->last_key = *key;
}
}

static void
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd4_emit emit = {
.vtx = &ctx->vtx,
.prog = &ctx->prog,
.info = info,
.key = {
/* do binning pass first: */
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
.alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])),
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
.has_per_samp = fd4_ctx->fsaturate || fd4_ctx->vsaturate,
.vsaturate_s = fd4_ctx->vsaturate_s,
.vsaturate_t = fd4_ctx->vsaturate_t,
.vsaturate_r = fd4_ctx->vsaturate_r,
.fsaturate_s = fd4_ctx->fsaturate_s,
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
},
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
};
unsigned dirty;

fixup_shader_state(ctx, &emit.key);

dirty = ctx->dirty;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
draw_impl(ctx, ctx->binning_ring, &emit);

/* and now regular (non-binning) pass: */
emit.key.binning_pass = false;
emit.dirty = dirty;
emit.vp = NULL; /* we changed key so need to refetch vp */
draw_impl(ctx, ctx->ring, &emit);
}

/* clear operations ignore viewport state, so we need to reset it
* based on framebuffer state:
*/
static void
reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
{
float half_width = pfb->width * 0.5f;
float half_height = pfb->height * 0.5f;

OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 4);
OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(half_width));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(half_width));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(half_height));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-half_height));
}

static void
fd4_clear(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
unsigned dirty = ctx->dirty;
unsigned ce, i;
struct fd4_emit emit = {
.vtx = &fd4_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
.half_precision = true,
},
};
uint32_t colr = 0;

if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
colr = pack_rgba(pfb->cbufs[0]->format, color->f);

dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
dirty |= FD_DIRTY_PROG;
emit.dirty = dirty;

OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

/* emit generic state now: */
fd4_emit_state(ctx, ring, &emit);
reset_viewport(ring, pfb);

if (buffers & PIPE_CLEAR_DEPTH) {
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));

fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2);
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth));
ctx->dirty |= FD_DIRTY_VIEWPORT;
} else {
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
}

if (buffers & PIPE_CLEAR_STENCIL) {
OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) |
A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
0xff000000 | // XXX ???
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));

OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
} else {
OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));

OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
}

if (buffers & PIPE_CLEAR_COLOR) {
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
ce = 0xf;
} else {
ce = 0x0;
}

for (i = 0; i < 8; i++) {
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
A4XX_RB_MRT_CONTROL_B11 |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));

OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
}

fd4_emit_vertex_bufs(ring, &emit);

OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, 0x0); /* XXX GRAS_ALPHA_CONTROL */

OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */
OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */
OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */
OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */

/* until fastclear works: */
fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);

OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */

OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */

OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
OUT_RING(ring, 0x00000001);

fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);

OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);

OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}

void
fd4_draw_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd4_draw_vbo;
ctx->clear = fd4_clear;
}

+ 122
- 0
src/gallium/drivers/freedreno/a4xx/fd4_draw.h View File

@@ -0,0 +1,122 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_DRAW_H_
#define FD4_DRAW_H_

#include "pipe/p_context.h"

#include "freedreno_draw.h"

void fd4_draw_init(struct pipe_context *pctx);

/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */

static inline uint32_t DRAW4(enum pc_di_primtype prim_type,
enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
enum pc_di_vis_cull_mode vis_cull_mode)
{
return (prim_type << 0) |
(source_select << 6) |
((index_size & 1) << 11) |
((index_size >> 1) << 13) |
(vis_cull_mode << 8);
}

static inline void
fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count,
enum pc_di_index_size idx_type,
uint32_t idx_size, uint32_t idx_offset,
struct fd_bo *idx_bo)
{
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup.
*/
emit_marker(ring, 7);

OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_bo ? 6 : 3);
if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not
*/
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&ctx->draw_patches);
} else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
}
OUT_RING(ring, 0x1); /* XXX */
OUT_RING(ring, count); /* NumIndices */
if (idx_bo) {
OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
OUT_RING (ring, idx_size);
}

emit_marker(ring, 7);

fd_reset_wfi(ctx);
}

static inline void
fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info)
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;

if (info->indexed) {
assert(!idx->user_buffer);

idx_bo = fd_resource(idx->buffer)->bo;
idx_type = size2indextype(idx->index_size);
idx_size = idx->index_size * info->count;
idx_offset = idx->offset + (info->start * idx->index_size);
src_sel = DI_SRC_SEL_DMA;
} else {
idx_bo = NULL;
idx_type = INDEX_SIZE_IGN;
idx_size = 0;
idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX;
}

fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
info->count, idx_type, idx_size, idx_offset, idx_bo);
}

#endif /* FD4_DRAW_H_ */

+ 625
- 0
src/gallium/drivers/freedreno/a4xx/fd4_emit.c View File

@@ -0,0 +1,625 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_helpers.h"
#include "util/u_format.h"

#include "freedreno_resource.h"

#include "fd4_emit.h"
#include "fd4_blend.h"
#include "fd4_context.h"
#include "fd4_program.h"
#include "fd4_rasterizer.h"
#include "fd4_texture.h"
#include "fd4_util.h"
#include "fd4_zsa.h"

/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
fd4_emit_constant(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;

if (prsc) {
sz = 0;
src = 0x2; // TODO ??
} else {
sz = sizedwords;
src = SS_DIRECT;
}

OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
CP_LOAD_STATE_0_STATE_SRC(src) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
OUT_RELOC(ring, bo, offset,
CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
} else {
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
}
for (i = 0; i < sz; i++) {
OUT_RING(ring, dwords[i]);
}
}

static void
emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
struct ir3_shader_variant *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
uint32_t first_immediate;
uint32_t base = 0;

// XXX TODO only emit dirty consts.. but we need to keep track if
// they are clobbered by a clear, gmem2mem, or mem2gmem..
constbuf->dirty_mask = enabled_mask;

/* in particular, with binning shader we may end up with unused
* consts, ie. we could end up w/ constlen that is smaller
* than first_immediate. In that case truncate the user consts
* early to avoid HLSQ lockup caused by writing too many consts
*/
first_immediate = MIN2(shader->first_immediate, shader->constlen);

/* emit user constants: */
while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1;
struct pipe_constant_buffer *cb = &constbuf->cb[index];
unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */

// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));

/* gallium could leave const buffers bound above what the
* current shader uses.. don't let that confuse us.
*/
if (base >= (4 * first_immediate))
break;

if (constbuf->dirty_mask & (1 << index)) {
/* and even if the start of the const buffer is before
* first_immediate, the end may not be:
*/
size = MIN2(size, (4 * first_immediate) - base);
fd4_emit_constant(ring, sb, base,
cb->buffer_offset, size,
cb->user_buffer, cb->buffer);
constbuf->dirty_mask &= ~(1 << index);
}

base += size;
enabled_mask &= ~(1 << index);
}

/* emit shader immediates: */
if (shader) {
int size = shader->immediates_count;
base = shader->first_immediate;

/* truncate size to avoid writing constants that shader
* does not use:
*/
size = MIN2(size + base, shader->constlen) - base;

/* convert out of vec4: */
base *= 4;
size *= 4;

if (size > 0) {
fd4_emit_constant(ring, sb, base,
0, size, shader->immediates[0].val, NULL);
}
}
}

static void
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum adreno_state_block sb, struct fd_texture_stateobj *tex)
{
unsigned i;

if (tex->num_samplers > 0) {
/* output sampler state: */
OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 + (2 * tex->num_samplers));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_samplers; i++) {
static const struct fd4_sampler_stateobj dummy_sampler = {};
const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
fd4_sampler_stateobj(tex->samplers[i]) :
&dummy_sampler;
OUT_RING(ring, sampler->texsamp0);
OUT_RING(ring, sampler->texsamp1);
}
/* maybe an a420.0 (or a4xx.0) workaround?? or just driver bug? */
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}

if (tex->num_textures > 0) {
/* emit texture state: */
OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
static const struct fd4_pipe_sampler_view dummy_view = {};
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
struct fd_resource *rsc = view->tex_resource;
struct fd_resource_slice *slice = fd_resource_slice(rsc, 0);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2);
OUT_RING(ring, view->texconst3);
OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
}
}

/* emit texture state for mem->gmem restore operation.. eventually it would
* be good to get rid of this and use normal CSO/etc state for more of these
* special cases..
*/
void
fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
{
/* TODO */
}


void
fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
{
uint32_t i, j, last = 0;
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);

/* hw doesn't like to be configured for zero vbo's, it seems: */
if (vtx->vtx->num_elements == 0)
return;

for (i = 0; i < n; i++)
if (vp->inputs[i].compmask)
last = i;

for (i = 0, j = 0; i <= last; i++) {
if (vp->inputs[i].compmask) {
struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
const struct pipe_vertex_buffer *vb =
&vtx->vertexbuf.vb[elem->vertex_buffer_index];
struct fd_resource *rsc = fd_resource(vb->buffer);
enum pipe_format pfmt = elem->src_format;
enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
bool switchnext = (i != last);
uint32_t fs = util_format_get_blocksize(pfmt);
uint32_t off = vb->buffer_offset + elem->src_offset;
uint32_t size = fd_bo_size(rsc->bo) - off;
debug_assert(fmt != ~0);

OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
OUT_RELOC(ring, rsc->bo, off, 0, 0);
OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
OUT_RING(ring, 0x00000001);

OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));

total_in += vp->inputs[i].ncomp;
j++;
}
}

OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
0xa0000 | /* XXX */
A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
A4XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
A4XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
OUT_RING(ring, 0x0000fc00); /* XXX VFD_CONTROL_3 */
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
}

void
fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit)
{
struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
uint32_t dirty = emit->dirty;

emit_marker(ring, 5);

if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;

/* I suppose if we needed to (which I don't *think* we need
* to), we could emit this for binning pass too. But we
* would need to keep a different patch-list for binning
* vs render pass.
*/

OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
}

if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;

OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, zsa->gras_alpha_control);

OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1);
OUT_RING(ring, zsa->rb_stencil_control);

OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
OUT_RING(ring, zsa->rb_stencilrefmask |
A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
OUT_RING(ring, zsa->rb_stencilrefmask_bf |
A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
}

if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_depth_control;
if (fp->writes_pos) {
val |= A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
if (fp->has_kill) {
val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, val);
}

if (dirty & FD_DIRTY_RASTERIZER) {
struct fd4_rasterizer_stateobj *rasterizer =
fd4_rasterizer_stateobj(ctx->rasterizer);

OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, rasterizer->gras_su_mode_control |
A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);

OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
OUT_RING(ring, rasterizer->gras_su_point_minmax);
OUT_RING(ring, rasterizer->gras_su_point_size);

OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
}

if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}

/* NOTE: since primitive_restart is not actually part of any
* state object, we need to make sure that we always emit
* PRIM_VTX_CNTL.. either that or be more clever and detect
* when it changes.
*/
if (emit->info) {
uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
->pc_prim_vtx_cntl;

val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
val |= COND(fp->total_in > 0, A4XX_PC_PRIM_VTX_CNTL_VAROUT);

OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
OUT_RING(ring, val);
OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */
}

if (dirty & FD_DIRTY_SCISSOR) {
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);

OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));

ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
}

if (dirty & FD_DIRTY_VIEWPORT) {
fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}

if (dirty & FD_DIRTY_PROG)
fd4_program_emit(ring, emit);

if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
/* evil hack to deal sanely with clear path: */
(emit->prog == &ctx->prog)) {
fd_wfi(ctx, ring);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
if (!emit->key.binning_pass) {
emit_constants(ring, SB_FRAG_SHADER,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
(emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
}
}

if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
uint32_t i;

for (i = 0; i < 8; i++) {
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, blend->rb_mrt[i].control);

OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
OUT_RING(ring, blend->rb_mrt[i].blend_control);
}
}

if (dirty & FD_DIRTY_VERTTEX) {
if (vp->has_samp)
emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
else
dirty &= ~FD_DIRTY_VERTTEX;
}

if (dirty & FD_DIRTY_FRAGTEX) {
if (fp->has_samp)
emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
else
dirty &= ~FD_DIRTY_FRAGTEX;
}

ctx->dirty &= ~dirty;
}

/* emit setup at begin of new cmdstream buffer (don't rely on previous
* state, there could have been a context switch between ioctls):
*/
void
fd4_emit_restore(struct fd_context *ctx)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;

OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
OUT_RING(ring, 0x00000001);

OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
OUT_RING(ring, 0x00000006);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
OUT_RING(ring, 0x0000003a);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
OUT_RING(ring, 0x00000001);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
OUT_RING(ring, 0x00000007);

OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000012);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
OUT_RING(ring, 0x00000006);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
OUT_RING(ring, 0x00040000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00001000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F3, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F4, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F5, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F6, 1);
OUT_RING(ring, 0x3c007fff);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
OUT_RING(ring, 0x3f800000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
OUT_RING(ring, 0x0000001d);

OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
OUT_RING(ring, 0x00000001);

OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
OUT_RING(ring, 0x00000000);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_2381, 1);
OUT_RING(ring, 0x00000010);

OUT_PKT0(ring, REG_A4XX_UNKNOWN_23A0, 1);
OUT_RING(ring, 0x00000010);

/* we don't use this yet.. probably best to disable.. */
OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
CP_SET_DRAW_STATE_0_GROUP_ID(0));
OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));

OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */
OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */

OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_PARAM */
OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */

OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));

OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));

OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));

OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));

OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));

OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf));

OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE);

ctx->needs_rb_fbd = true;
}

+ 91
- 0
src/gallium/drivers/freedreno/a4xx/fd4_emit.h View File

@@ -0,0 +1,91 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_EMIT_H
#define FD4_EMIT_H

#include "pipe/p_context.h"

#include "freedreno_context.h"
#include "fd4_util.h"
#include "fd4_program.h"
#include "ir3_shader.h"

struct fd_ringbuffer;
enum adreno_state_block;

void fd4_emit_constant(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);

void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
struct pipe_surface *psurf);

/* grouped together emit-state for prog/vertex/state emit: */
struct fd4_emit {
const struct fd_vertex_state *vtx;
const struct fd_program_stateobj *prog;
const struct pipe_draw_info *info;
struct ir3_shader_key key;
uint32_t dirty;
bool rasterflat;

/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;
/* TODO: other shader stages.. */
};

static inline struct ir3_shader_variant *
fd4_emit_get_vp(struct fd4_emit *emit)
{
if (!emit->vp) {
struct fd4_shader_stateobj *so = emit->prog->vp;
emit->vp = ir3_shader_variant(so->shader, emit->key);
}
return emit->vp;
}

static inline struct ir3_shader_variant *
fd4_emit_get_fp(struct fd4_emit *emit)
{
if (!emit->fp) {
struct fd4_shader_stateobj *so = emit->prog->fp;
emit->fp = ir3_shader_variant(so->shader, emit->key);
}
return emit->fp;
}

void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit);

void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit);

void fd4_emit_restore(struct fd_context *ctx);

#endif /* FD4_EMIT_H */

+ 415
- 0
src/gallium/drivers/freedreno/a4xx/fd4_gmem.c View File

@@ -0,0 +1,415 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"

#include "freedreno_draw.h"
#include "freedreno_state.h"
#include "freedreno_resource.h"

#include "fd4_gmem.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_emit.h"
#include "fd4_program.h"
#include "fd4_util.h"
#include "fd4_zsa.h"

static const struct ir3_shader_key key = {
// XXX should set this based on render target format! We don't
// want half_precision if float32 render target!!!
.half_precision = true,
};

static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
{
unsigned i;

for (i = 0; i < 8; i++) {
enum a4xx_color_fmt format = 0;
enum a3xx_color_swap swap = WZYX;
struct fd_resource *rsc = NULL;
struct fd_resource_slice *slice = NULL;
uint32_t stride = 0;
uint32_t base = 0;
uint32_t layer_offset = 0;

if ((i < nr_bufs) && bufs[i]) {
struct pipe_surface *psurf = bufs[i];

rsc = fd_resource(psurf->texture);
slice = &rsc->slices[psurf->u.tex.level];
format = fd4_pipe2color(psurf->format);
swap = fd4_pipe2swap(psurf->format);

debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);

layer_offset = slice->size0 * psurf->u.tex.first_layer;

if (bin_w) {
stride = bin_w * rsc->cpp;

if (bases) {
base = bases[i];
}
} else {
stride = slice->pitch * rsc->cpp;
}
}

OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
0x80 | /* XXX not on gmem2mem?? tile-mode? */
A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
if (bin_w || (i >= nr_bufs)) {
OUT_RING(ring, base);
} else {
OUT_RELOCW(ring, rsc->bo,
slice->offset + layer_offset, 0, -1);
}
OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
}
}

static uint32_t
depth_base(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
uint32_t cpp = 4;
if (pfb->cbufs[0]) {
struct fd_resource *rsc =
fd_resource(pfb->cbufs[0]->texture);
cpp = rsc->cpp;
}
return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
}

/* transfer from gmem to system memory (ie. normal RAM) */

static void
emit_gmem2mem_surf(struct fd_context *ctx,
uint32_t base, struct pipe_surface *psurf)
{
struct fd_ringbuffer *ring = ctx->ring;
struct fd_resource *rsc = fd_resource(psurf->texture);
struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];

OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
OUT_RELOCW(ring, rsc->bo, slice->offset, 0, 0); /* RB_COPY_DEST_BASE */
OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));

fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}

static void
fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &fd4_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = key,
};

OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));

OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1);
OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));

OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
OUT_RING(ring, 0xff000000 |
A4XX_RB_STENCILREFMASK_STENCILREF(0) |
A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
OUT_RING(ring, 0xff000000 |
A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));

OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));

fd_wfi(ctx, ring);

OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */

OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));

OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
0xa); /* XXX */

OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));

OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);

OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, 0x00000002);

OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));

OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */

fd4_program_emit(ring, &emit);
fd4_emit_vertex_bufs(ring, &emit);

if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
uint32_t base = depth_base(ctx);
emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
}

if (ctx->resolve & FD_BUFFER_COLOR) {
emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
}

OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}

/* transfer from system memory to gmem */

static void
fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
{
/* TODO */
}

static void
patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
{
unsigned i;
for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
}
util_dynarray_resize(&ctx->draw_patches, 0);
}

static void
patch_rbrc(struct fd_context *ctx, uint32_t val)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
unsigned i;
for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
*patch->cs = patch->val | val;
}
util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
}

static void
update_vsc_pipe(struct fd_context *ctx)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
int i;

OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */

OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
}

OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
if (!pipe->bo) {
pipe->bo = fd_bo_new(ctx->dev, 0x40000,
DRM_FREEDRENO_GEM_TYPE_KMEM);
}
OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i] */
}

OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
}
}

/* before first tile */
static void
fd4_emit_tile_init(struct fd_context *ctx)
{
struct fd_ringbuffer *ring = ctx->ring;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
uint32_t rb_render_control;

fd4_emit_restore(ctx);

OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));

OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
0x00010000); /* XXX */

update_vsc_pipe(ctx);
patch_draws(ctx, IGNORE_VISIBILITY);

rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
patch_rbrc(ctx, rb_render_control);
}

/* before mem2gmem */
static void
fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
uint32_t reg;

OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
if (pfb->zsbuf) {
reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
}
OUT_RING(ring, reg);
if (pfb->zsbuf) {
OUT_RING(ring, A4XX_RB_DEPTH_PITCH(gmem->bin_w));
OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(gmem->bin_w));
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}

if (pfb->zsbuf) {
OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
fd_pipe2depth(pfb->zsbuf->format)));
}

if (ctx->needs_rb_fbd) {
fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
ctx->needs_rb_fbd = false;
}
}

/* before IB to rendering cmds: */
static void
fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd_ringbuffer *ring = ctx->ring;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;

uint32_t x1 = tile->xoff;
uint32_t y1 = tile->yoff;
uint32_t x2 = tile->xoff + tile->bin_w - 1;
uint32_t y2 = tile->yoff + tile->bin_h - 1;

OUT_PKT3(ring, CP_SET_BIN, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));

emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);

/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
A4XX_RB_BIN_OFFSET_Y(tile->yoff));

OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
}

void
fd4_gmem_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);

ctx->emit_tile_init = fd4_emit_tile_init;
ctx->emit_tile_prep = fd4_emit_tile_prep;
ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
}

+ 36
- 0
src/gallium/drivers/freedreno/a4xx/fd4_gmem.h View File

@@ -0,0 +1,36 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_GMEM_H_
#define FD4_GMEM_H_

#include "pipe/p_context.h"

void fd4_gmem_init(struct pipe_context *pctx);

#endif /* FD4_GMEM_H_ */

+ 480
- 0
src/gallium/drivers/freedreno/a4xx/fd4_program.c View File

@@ -0,0 +1,480 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"

#include "freedreno_program.h"

#include "fd4_program.h"
#include "fd4_emit.h"
#include "fd4_texture.h"
#include "fd4_util.h"

static void
delete_shader_stateobj(struct fd4_shader_stateobj *so)
{
ir3_shader_destroy(so->shader);
free(so);
}

static struct fd4_shader_stateobj *
create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
enum shader_t type)
{
struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
so->shader = ir3_shader_create(pctx, cso->tokens, type);
return so;
}

static void *
fd4_fp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
{
return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
}

static void
fd4_fp_state_delete(struct pipe_context *pctx, void *hwcso)
{
struct fd4_shader_stateobj *so = hwcso;
delete_shader_stateobj(so);
}

static void *
fd4_vp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
{
return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
}

static void
fd4_vp_state_delete(struct pipe_context *pctx, void *hwcso)
{
struct fd4_shader_stateobj *so = hwcso;
delete_shader_stateobj(so);
}

static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
const struct ir3_info *si = &so->info;
enum adreno_state_block sb;
enum adreno_state_src src;
uint32_t i, sz, *bin;

if (so->type == SHADER_VERTEX) {
sb = SB_VERT_SHADER;
} else {
sb = SB_FRAG_SHADER;
}

if (fd_mesa_debug & FD_DBG_DIRECT) {
sz = si->sizedwords;
src = SS_DIRECT;
bin = fd_bo_map(so->bo);
} else {
sz = 0;
src = 2; // enums different on a4xx..
bin = NULL;
}

OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(src) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
if (bin) {
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
} else {
OUT_RELOC(ring, so->bo, 0,
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
}
for (i = 0; i < sz; i++) {
OUT_RING(ring, bin[i]);
}
}

struct stage {
const struct ir3_shader_variant *v;
const struct ir3_info *i;
/* const sizes are in units of 4 * vec4 */
uint8_t constoff;
uint8_t constlen;
/* instr sizes are in units of 16 instructions */
uint8_t instroff;
uint8_t instrlen;
};

enum {
VS = 0,
FS = 1,
HS = 2,
DS = 3,
GS = 4,
MAX_STAGES
};

static void
setup_stages(struct fd4_emit *emit, struct stage *s)
{
unsigned i;

s[VS].v = fd4_emit_get_vp(emit);

if (emit->key.binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fp = {};
s[FS].v = &binning_fp;
} else {
s[FS].v = fd4_emit_get_fp(emit);
}

s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */

for (i = 0; i < MAX_STAGES; i++) {
if (s[i].v) {
s[i].i = &s[i].v->info;
/* constlen is in units of 4 * vec4: */
s[i].constlen = align(s[i].v->constlen, 4) / 4;
/* instrlen is already in units of 16 instr.. although
* probably we should ditch that and not make the compiler
* care about instruction group size of a3xx vs a4xx
*/
s[i].instrlen = s[i].v->instrlen;
} else {
s[i].i = NULL;
s[i].constlen = 0;
s[i].instrlen = 0;
}
}

/* NOTE: at least for gles2, blob partitions VS at bottom of const
* space and FS taking entire remaining space. We probably don't
* need to do that the same way, but for now mimic what the blob
* does to make it easier to diff against register values from blob
*/
s[VS].constlen = 66;
s[FS].constlen = 128 - s[VS].constlen;
s[VS].instroff = 0;
s[VS].constoff = 0;
s[FS].instroff = 64 - s[FS].instrlen;
s[FS].constoff = s[VS].constlen;
s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
}

void
fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
{
struct stage s[MAX_STAGES];
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
int constmode;
int i, j, k;

setup_stages(emit, s);

/* blob seems to always use constmode currently: */
constmode = 1;

pos_regid = ir3_find_output_regid(s[VS].v,
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
posz_regid = ir3_find_output_regid(s[FS].v,
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
psize_regid = ir3_find_output_regid(s[VS].v,
ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
color_regid = ir3_find_output_regid(s[FS].v,
ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));

/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
*/

OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
OUT_RING(ring, 0x00000003);

OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 4);
OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
* flush some caches? I think we only need to set those
* bits if we have updated const or shader..
*/
A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
0xfcfc0000 | /* XXX */
A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
COND(s[FS].v->frag_coord, A4XX_HLSQ_CONTROL_1_REG_ZWCOORD));
OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid));

OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));

OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, 0x140010 | /* XXX */
COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));

OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
OUT_RING(ring, 0x1c3); /* XXX SP_INSTR_CACHE_CTRL */

OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */

OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4));

for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
uint32_t reg = 0;

OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);

j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count) {
k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
}

j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count) {
k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
}

OUT_RING(ring, reg);
}

for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) {
uint32_t reg = 0;

OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);

j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count)
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc);
j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count)
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc);
j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count)
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc);
j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count)
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc);

OUT_RING(ring, reg);
}

OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */

OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */

OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
0x80000000 | /* XXX */
COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING));

OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
if (emit->key.binning_pass)
OUT_RING(ring, 0x00000000);
else
OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */

OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));

OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));

OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));

OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL2, 1);
OUT_RING(ring, A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES(0) |
COND(s[FS].v->total_in > 0, A4XX_RB_MSAA_CONTROL2_VARYING));

OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
if (s[FS].v->writes_pos) {
OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
} else {
OUT_RING(ring, 0x00000001);
}

OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
// XXX do we need to patch? or update when RT format changes.. maybe
// move this to emit??
A4XX_SP_FS_MRT_REG_MRTFORMAT(RB4_R8G8B8A8_UNORM) | // XXX patch?
COND(s[FS].v->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));

if (emit->key.binning_pass) {
OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
0x40000000 | /* XXX */
COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
OUT_RING(ring, 0x00000000);
} else {
uint32_t vinterp[8] = {0}, flatshade[2] = {0};

/* figure out VARYING_INTERP / FLAT_SHAD register values: */
for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
uint32_t interp = s[FS].v->inputs[j].interpolate;
if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
* instead.. rather than -8 everywhere else..
*/
uint32_t loc = s[FS].v->inputs[j].inloc - 8;

/* currently assuming varyings aligned to 4 (not
* packed):
*/
debug_assert((loc % 4) == 0);

for (i = 0; i < 4; i++, loc++) {
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
flatshade[loc / 32] |= 1 << (loc % 32);
}
}
}

OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
A4XX_VPC_ATTR_THRDASSIGN(1) |
COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
0x40000000 | /* XXX */
COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));

OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
for (i = 0; i < 8; i++)
OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */

OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
for (i = 0; i < 8; i++)
OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
}

emit_shader(ring, s[VS].v);

if (!emit->key.binning_pass)
emit_shader(ring, s[FS].v);
}

/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
static void
fix_blit_fp(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
struct fd4_shader_stateobj *so = ctx->blit_prog.fp;

so->shader->vpsrepl[0] = 0x99999999;
so->shader->vpsrepl[1] = 0x99999999;
so->shader->vpsrepl[2] = 0x99999999;
so->shader->vpsrepl[3] = 0x99999999;
}

void
fd4_prog_init(struct pipe_context *pctx)
{
pctx->create_fs_state = fd4_fp_state_create;
pctx->delete_fs_state = fd4_fp_state_delete;

pctx->create_vs_state = fd4_vp_state_create;
pctx->delete_vs_state = fd4_vp_state_delete;

fd_prog_init(pctx);

fix_blit_fp(pctx);
}

+ 46
- 0
src/gallium/drivers/freedreno/a4xx/fd4_program.h View File

@@ -0,0 +1,46 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_PROGRAM_H_
#define FD4_PROGRAM_H_

#include "pipe/p_context.h"
#include "freedreno_context.h"
#include "ir3_shader.h"

struct fd4_shader_stateobj {
struct ir3_shader *shader;
};

struct fd4_emit;

void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);

void fd4_prog_init(struct pipe_context *pctx);

#endif /* FD4_PROGRAM_H_ */

+ 39
- 0
src/gallium/drivers/freedreno/a4xx/fd4_query.c View File

@@ -0,0 +1,39 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "freedreno_query_hw.h"
#include "freedreno_context.h"
#include "freedreno_util.h"

#include "fd4_query.h"
#include "fd4_util.h"

void fd4_query_context_init(struct pipe_context *pctx)
{
/* TODO */
}

+ 36
- 0
src/gallium/drivers/freedreno/a4xx/fd4_query.h View File

@@ -0,0 +1,36 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_QUERY_H_
#define FD4_QUERY_H_

#include "pipe/p_context.h"

void fd4_query_context_init(struct pipe_context *pctx);

#endif /* FD4_QUERY_H_ */

+ 94
- 0
src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c View File

@@ -0,0 +1,94 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/


#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"

#include "fd4_rasterizer.h"
#include "fd4_context.h"
#include "fd4_util.h"

void *
fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)
{
struct fd4_rasterizer_stateobj *so;
float psize_min, psize_max;

so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
if (!so)
return NULL;

so->base = *cso;

if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 8192;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}

/*
if (cso->line_stipple_enable) {
??? TODO line stipple
}
TODO cso->half_pixel_center
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl = 0x80000; /* ??? */
so->gras_su_point_minmax =
A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
so->gras_su_poly_offset_scale =
A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);

so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);

if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;

if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;

return so;
}

+ 56
- 0
src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h View File

@@ -0,0 +1,56 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_RASTERIZER_H_
#define FD4_RASTERIZER_H_

#include "pipe/p_state.h"
#include "pipe/p_context.h"

struct fd4_rasterizer_stateobj {
struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;

uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
};

static INLINE struct fd4_rasterizer_stateobj *
fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd4_rasterizer_stateobj *)rast;
}

void * fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);

#endif /* FD4_RASTERIZER_H_ */

+ 105
- 0
src/gallium/drivers/freedreno/a4xx/fd4_screen.c View File

@@ -0,0 +1,105 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_screen.h"
#include "util/u_format.h"

#include "fd4_screen.h"
#include "fd4_context.h"
#include "fd4_util.h"

static boolean
fd4_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned usage)
{
unsigned retval = 0;

if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1) || /* TODO add MSAA */
!util_format_is_supported(format, usage)) {
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return FALSE;
}

if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd4_pipe2vtx(format) != ~0)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}

if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd4_pipe2tex(format) != ~0)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}

if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) &&
(fd4_pipe2color(format) != ~0) &&
(fd4_pipe2tex(format) != ~0)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
}

if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != ~0) &&
(fd4_pipe2tex(format) != ~0)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}

if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}

if (usage & PIPE_BIND_TRANSFER_READ)
retval |= PIPE_BIND_TRANSFER_READ;
if (usage & PIPE_BIND_TRANSFER_WRITE)
retval |= PIPE_BIND_TRANSFER_WRITE;

if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format),
target, sample_count, usage, retval);
}

return retval == usage;
}

void
fd4_screen_init(struct pipe_screen *pscreen)
{
pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported;
}

+ 36
- 0
src/gallium/drivers/freedreno/a4xx/fd4_screen.h View File

@@ -0,0 +1,36 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_SCREEN_H_
#define FD4_SCREEN_H_

#include "pipe/p_screen.h"

void fd4_screen_init(struct pipe_screen *pscreen);

#endif /* FD4_SCREEN_H_ */

+ 190
- 0
src/gallium/drivers/freedreno/a4xx/fd4_texture.c View File

@@ -0,0 +1,190 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"

#include "fd4_texture.h"
#include "fd4_util.h"

/* TODO do we need to emulate clamp-to-edge like a3xx? */
static enum a4xx_tex_clamp
tex_clamp(unsigned wrap)
{
/* hardware probably supports more, but we can't coax all the
* wrap/clamp modes out of the GLESv2 blob driver.
*
* TODO once we have basics working, go back and just try
* different values and see what happens
*/
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A4XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A4XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
// TODO
// return A4XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
// TODO
// return A4XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A4XX_TEX_MIRROR_REPEAT;
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
}

static enum a4xx_tex_filter
tex_filter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A4XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return A4XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
}

static void *
fd4_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
{
struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);

if (!so)
return NULL;

so->base = *cso;

so->texsamp0 =
A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));

if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 =
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
} else {
so->texsamp1 = 0x00000000;
}

if (cso->compare_mode)
so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */

return so;
}

static enum a4xx_tex_type
tex_type(unsigned target)
{
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A4XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A4XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A4XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A4XX_TEX_CUBE;
}
}

static struct pipe_sampler_view *
fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
unsigned lvl = cso->u.tex.first_level;

if (!so)
return NULL;

so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;

so->tex_resource = rsc;

so->texconst0 =
A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);

so->texconst1 =
A4XX_TEX_CONST_1_WIDTH(prsc->width0) |
A4XX_TEX_CONST_1_HEIGHT(prsc->height0);
so->texconst2 =
A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);

switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_3D:
so->texconst3 =
A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
break;
default:
so->texconst3 = 0x00000000;
break;
}

return &so->base;
}

void
fd4_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd4_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd4_sampler_view_create;
}

+ 68
- 0
src/gallium/drivers/freedreno/a4xx/fd4_texture.h View File

@@ -0,0 +1,68 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_TEXTURE_H_
#define FD4_TEXTURE_H_

#include "pipe/p_context.h"

#include "freedreno_texture.h"
#include "freedreno_resource.h"

#include "fd4_context.h"
#include "fd4_util.h"

struct fd4_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1;
};

static INLINE struct fd4_sampler_stateobj *
fd4_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd4_sampler_stateobj *)samp;
}

struct fd4_pipe_sampler_view {
struct pipe_sampler_view base;
struct fd_resource *tex_resource;
uint32_t texconst0, texconst1, texconst2, texconst3;
};

static INLINE struct fd4_pipe_sampler_view *
fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd4_pipe_sampler_view *)pview;
}

unsigned fd4_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id);

void fd4_texture_init(struct pipe_context *pctx);

#endif /* FD4_TEXTURE_H_ */

+ 401
- 0
src/gallium/drivers/freedreno/a4xx/fd4_util.c View File

@@ -0,0 +1,401 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#include "pipe/p_defines.h"
#include "util/u_format.h"

#include "fd4_util.h"

/* convert pipe format to vertex buffer format: */
enum a4xx_vtx_fmt
fd4_pipe2vtx(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_R8_UNORM:
return VFMT4_NORM_UBYTE_8;

case PIPE_FORMAT_R8_SNORM:
return VFMT4_NORM_BYTE_8;

case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_USCALED:
return VFMT4_UBYTE_8;

case PIPE_FORMAT_R8_SINT:
case PIPE_FORMAT_R8_SSCALED:
return VFMT4_BYTE_8;

/* 16-bit buffers. */
case PIPE_FORMAT_R16_UNORM:
case PIPE_FORMAT_Z16_UNORM:
return VFMT4_NORM_USHORT_16;

case PIPE_FORMAT_R16_SNORM:
return VFMT4_NORM_SHORT_16;

case PIPE_FORMAT_R16_UINT:
case PIPE_FORMAT_R16_USCALED:
return VFMT4_USHORT_16;

case PIPE_FORMAT_R16_SINT:
case PIPE_FORMAT_R16_SSCALED:
return VFMT4_SHORT_16;

case PIPE_FORMAT_R16_FLOAT:
return VFMT4_FLOAT_16;

case PIPE_FORMAT_R8G8_UNORM:
return VFMT4_NORM_UBYTE_8_8;

case PIPE_FORMAT_R8G8_SNORM:
return VFMT4_NORM_BYTE_8_8;

case PIPE_FORMAT_R8G8_UINT:
case PIPE_FORMAT_R8G8_USCALED:
return VFMT4_UBYTE_8_8;

case PIPE_FORMAT_R8G8_SINT:
case PIPE_FORMAT_R8G8_SSCALED:
return VFMT4_BYTE_8_8;

/* 24-bit buffers. */
case PIPE_FORMAT_R8G8B8_UNORM:
return VFMT4_NORM_UBYTE_8_8_8;

case PIPE_FORMAT_R8G8B8_SNORM:
return VFMT4_NORM_BYTE_8_8_8;

case PIPE_FORMAT_R8G8B8_UINT:
case PIPE_FORMAT_R8G8B8_USCALED:
return VFMT4_UBYTE_8_8_8;

case PIPE_FORMAT_R8G8B8_SINT:
case PIPE_FORMAT_R8G8B8_SSCALED:
return VFMT4_BYTE_8_8_8;

/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
return VFMT4_NORM_UBYTE_8_8_8_8;

case PIPE_FORMAT_R8G8B8A8_SNORM:
return VFMT4_NORM_BYTE_8_8_8_8;

case PIPE_FORMAT_R8G8B8A8_UINT:
case PIPE_FORMAT_R8G8B8A8_USCALED:
return VFMT4_UBYTE_8_8_8_8;

case PIPE_FORMAT_R8G8B8A8_SINT:
case PIPE_FORMAT_R8G8B8A8_SSCALED:
return VFMT4_BYTE_8_8_8_8;

case PIPE_FORMAT_R16G16_SSCALED:
case PIPE_FORMAT_R16G16_SINT:
return VFMT4_SHORT_16_16;

case PIPE_FORMAT_R16G16_FLOAT:
return VFMT4_FLOAT_16_16;

case PIPE_FORMAT_R16G16_UINT:
case PIPE_FORMAT_R16G16_USCALED:
return VFMT4_USHORT_16_16;

case PIPE_FORMAT_R16G16_UNORM:
return VFMT4_NORM_USHORT_16_16;

case PIPE_FORMAT_R16G16_SNORM:
return VFMT4_NORM_SHORT_16_16;

case PIPE_FORMAT_R10G10B10A2_UNORM:
return VFMT4_NORM_UINT_10_10_10_2;

case PIPE_FORMAT_R10G10B10A2_SNORM:
return VFMT4_NORM_INT_10_10_10_2;

case PIPE_FORMAT_R10G10B10A2_UINT:
case PIPE_FORMAT_R10G10B10A2_USCALED:
return VFMT4_UINT_10_10_10_2;

case PIPE_FORMAT_R10G10B10A2_SSCALED:
return VFMT4_INT_10_10_10_2;

/* 48-bit buffers. */
case PIPE_FORMAT_R16G16B16_FLOAT:
return VFMT4_FLOAT_16_16_16;

case PIPE_FORMAT_R16G16B16_SINT:
case PIPE_FORMAT_R16G16B16_SSCALED:
return VFMT4_SHORT_16_16_16;

case PIPE_FORMAT_R16G16B16_UINT:
case PIPE_FORMAT_R16G16B16_USCALED:
return VFMT4_USHORT_16_16_16;

case PIPE_FORMAT_R16G16B16_SNORM:
return VFMT4_NORM_SHORT_16_16_16;

case PIPE_FORMAT_R16G16B16_UNORM:
return VFMT4_NORM_USHORT_16_16_16;

case PIPE_FORMAT_R32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT:
return VFMT4_FLOAT_32;

case PIPE_FORMAT_R32_FIXED:
return VFMT4_FIXED_32;

/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
return VFMT4_NORM_USHORT_16_16_16_16;

case PIPE_FORMAT_R16G16B16A16_SNORM:
return VFMT4_NORM_SHORT_16_16_16_16;

case PIPE_FORMAT_R16G16B16A16_UINT:
case PIPE_FORMAT_R16G16B16A16_USCALED:
return VFMT4_USHORT_16_16_16_16;

case PIPE_FORMAT_R16G16B16A16_SINT:
case PIPE_FORMAT_R16G16B16A16_SSCALED:
return VFMT4_SHORT_16_16_16_16;

case PIPE_FORMAT_R32G32_FLOAT:
return VFMT4_FLOAT_32_32;

case PIPE_FORMAT_R32G32_FIXED:
return VFMT4_FIXED_32_32;

case PIPE_FORMAT_R16G16B16A16_FLOAT:
return VFMT4_FLOAT_16_16_16_16;

/* 96-bit buffers. */
case PIPE_FORMAT_R32G32B32_FLOAT:
return VFMT4_FLOAT_32_32_32;

case PIPE_FORMAT_R32G32B32_FIXED:
return VFMT4_FIXED_32_32_32;

/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return VFMT4_FLOAT_32_32_32_32;

case PIPE_FORMAT_R32G32B32A32_FIXED:
return VFMT4_FIXED_32_32_32_32;

/* TODO probably need gles3 blob drivers to find the 32bit int formats:
case PIPE_FORMAT_R32G32B32A32_SNORM:
case PIPE_FORMAT_R32G32B32A32_UNORM:
case PIPE_FORMAT_R32G32B32A32_SINT:
case PIPE_FORMAT_R32G32B32A32_UINT:

case PIPE_FORMAT_R32_UINT:
case PIPE_FORMAT_R32_SINT:
case PIPE_FORMAT_A32_UINT:
case PIPE_FORMAT_A32_SINT:
case PIPE_FORMAT_L32_UINT:
case PIPE_FORMAT_L32_SINT:
case PIPE_FORMAT_I32_UINT:
case PIPE_FORMAT_I32_SINT:

case PIPE_FORMAT_R32G32_SINT:
case PIPE_FORMAT_R32G32_UINT:
case PIPE_FORMAT_L32A32_UINT:
case PIPE_FORMAT_L32A32_SINT:
*/

default:
return ~0;
}
}

/* convert pipe format to texture sampler format: */
enum a4xx_tex_fmt
fd4_pipe2tex(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
return TFMT4_NORM_UINT_8;

case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_B8G8R8A8_SRGB:
case PIPE_FORMAT_B8G8R8X8_SRGB:
case PIPE_FORMAT_R8G8B8A8_SRGB:
case PIPE_FORMAT_R8G8B8X8_SRGB:
return TFMT4_NORM_UINT_8_8_8_8;

case PIPE_FORMAT_Z24X8_UNORM:
return TFMT4_NORM_UINT_X8Z24;

case PIPE_FORMAT_Z24_UNORM_S8_UINT:
return TFMT4_NORM_UINT_8_8_8_8;

// case PIPE_FORMAT_Z16_UNORM:
// return TFMT4_NORM_UINT_8_8;
//
case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R16G16B16X16_FLOAT:
return TFMT4_FLOAT_16_16_16_16;

case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32X32_FLOAT:
return TFMT4_FLOAT_32_32_32_32;

// TODO add more..

default:
return ~0;
}
}

/* convert pipe format to MRT / copydest format used for render-target: */
enum a4xx_color_fmt
fd4_pipe2color(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
return RB4_R8G8B8A8_UNORM;

case PIPE_FORMAT_Z16_UNORM:
return RB4_Z16_UNORM;

case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
/* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */
return RB4_R8G8B8A8_UNORM;

case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_A8_UNORM:
return RB4_A8_UNORM;
//
// case PIPE_FORMAT_R16G16B16A16_FLOAT:
// case PIPE_FORMAT_R16G16B16X16_FLOAT:
// return RB4_R16G16B16A16_FLOAT;
//
// case PIPE_FORMAT_R32G32B32A32_FLOAT:
// case PIPE_FORMAT_R32G32B32X32_FLOAT:
// return RB4_R32G32B32A32_FLOAT;

// TODO add more..

default:
return ~0;
}
}

/* we need to special case a bit the depth/stencil restore, because we are
* using the texture sampler to blit into the depth/stencil buffer, *not*
* into a color buffer. Otherwise fd4_tex_swiz() will do the wrong thing,
* as it is assuming that you are sampling into normal render target..
*/
enum pipe_format
fd4_gmem_restore_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_Z16_UNORM:
return PIPE_FORMAT_B8G8R8A8_UNORM;
default:
return format;
}
}

/* TODO share w/ a3xx?? */
enum a3xx_color_swap
fd4_pipe2swap(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_B8G8R8A8_SRGB:
case PIPE_FORMAT_B8G8R8X8_SRGB:
return WXYZ;

case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_A8R8G8B8_SRGB:
case PIPE_FORMAT_X8R8G8B8_SRGB:
return ZYXW;

case PIPE_FORMAT_A8B8G8R8_UNORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_X8B8G8R8_SRGB:
return XYZW;

case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
default:
return WZYX;
}
}

static inline enum a4xx_tex_swiz
tex_swiz(unsigned swiz)
{
switch (swiz) {
default:
case PIPE_SWIZZLE_RED: return A4XX_TEX_X;
case PIPE_SWIZZLE_GREEN: return A4XX_TEX_Y;
case PIPE_SWIZZLE_BLUE: return A4XX_TEX_Z;
case PIPE_SWIZZLE_ALPHA: return A4XX_TEX_W;
case PIPE_SWIZZLE_ZERO: return A4XX_TEX_ZERO;
case PIPE_SWIZZLE_ONE: return A4XX_TEX_ONE;
}
}

uint32_t
fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned char swiz[4] = {
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
}, rswiz[4];

util_format_compose_swizzles(desc->swizzle, swiz, rswiz);

return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}

+ 45
- 0
src/gallium/drivers/freedreno/a4xx/fd4_util.h View File

@@ -0,0 +1,45 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_UTIL_H_
#define FD4_UTIL_H_

#include "freedreno_util.h"

#include "a4xx.xml.h"

enum a4xx_vtx_fmt fd4_pipe2vtx(enum pipe_format format);
enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format);
enum pipe_format fd4_gmem_restore_format(enum pipe_format format);
enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);

uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);

#endif /* FD4_UTIL_H_ */

+ 105
- 0
src/gallium/drivers/freedreno/a4xx/fd4_zsa.c View File

@@ -0,0 +1,105 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/


#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"

#include "fd4_zsa.h"
#include "fd4_context.h"
#include "fd4_util.h"

void *
fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso)
{
struct fd4_zsa_stateobj *so;

so = CALLOC_STRUCT(fd4_zsa_stateobj);
if (!so)
return NULL;

so->base = *cso;

so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */

if (cso->depth.enabled)
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;

if (cso->depth.writemask)
so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;

if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];

so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);

if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];

so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
}
}

if (cso->alpha.enabled) {
so->gras_alpha_control =
A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
so->rb_alpha_control =
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}

so->rb_render_control = 0x8; /* XXX */

return so;
}

+ 58
- 0
src/gallium/drivers/freedreno/a4xx/fd4_zsa.h View File

@@ -0,0 +1,58 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */

/*
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/

#ifndef FD4_ZSA_H_
#define FD4_ZSA_H_


#include "pipe/p_state.h"
#include "pipe/p_context.h"

#include "freedreno_util.h"

struct fd4_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base;
uint32_t gras_alpha_control;
uint32_t rb_alpha_control;
uint32_t rb_render_control;
uint32_t rb_depth_control;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
};

static INLINE struct fd4_zsa_stateobj *
fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd4_zsa_stateobj *)zsa;
}

void * fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);

#endif /* FD4_ZSA_H_ */

+ 5
- 1
src/gallium/drivers/freedreno/freedreno_screen.c View File

@@ -52,6 +52,7 @@

#include "a2xx/fd2_screen.h"
#include "a3xx/fd3_screen.h"
#include "a4xx/fd4_screen.h"

/* XXX this should go away */
#include "state_tracker/drm_driver.h"
@@ -514,7 +515,7 @@ fd_screen_create(struct fd_device *dev)
* before enabling:
*
* If you have a different adreno version, feel free to add it to one
* of the two cases below and see what happens. And if it works, please
* of the cases below and see what happens. And if it works, please
* send a patch ;-)
*/
switch (screen->gpu_id) {
@@ -525,6 +526,9 @@ fd_screen_create(struct fd_device *dev)
case 330:
fd3_screen_init(pscreen);
break;
case 420:
fd4_screen_init(pscreen);
break;
default:
debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
goto fail;

+ 10
- 5
src/gallium/drivers/freedreno/ir3/ir3.c View File

@@ -540,7 +540,8 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr,
emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
};

void * ir3_assemble(struct ir3 *shader, struct ir3_info *info)
void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
uint32_t gpu_id)
{
uint32_t *ptr, *dwords;
uint32_t i;
@@ -550,11 +551,15 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info)
info->max_const = -1;
info->instrs_count = 0;

/* need a integer number of instruction "groups" (sets of four
* instructions), so pad out w/ NOPs if needed:
* (each instruction is 64bits)
/* need a integer number of instruction "groups" (sets of 16
* instructions on a4xx or sets of 4 instructions on a3xx),
* so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
*/
info->sizedwords = 2 * align(shader->instrs_count, 4);
if (gpu_id >= 400) {
info->sizedwords = 2 * align(shader->instrs_count, 16);
} else {
info->sizedwords = 2 * align(shader->instrs_count, 4);
}

ptr = dwords = calloc(4, info->sizedwords);


+ 1
- 1
src/gallium/drivers/freedreno/ir3/ir3.h View File

@@ -264,7 +264,7 @@ struct ir3_block {
struct ir3 * ir3_create(void);
void ir3_destroy(struct ir3 *shader);
void * ir3_assemble(struct ir3 *shader,
struct ir3_info *info);
struct ir3_info *info, uint32_t gpu_id);
void * ir3_alloc(struct ir3 *shader, int sz);

struct ir3_block * ir3_block_create(struct ir3 *shader,

+ 2
- 1
src/gallium/drivers/freedreno/ir3/ir3_cmdline.c View File

@@ -49,7 +49,8 @@ static void dump_info(struct ir3_shader_variant *so, const char *str)
const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";

// for debug, dump some before/after info:
bin = ir3_assemble(so->ir, &info);
// TODO make gpu_id configurable on cmdline
bin = ir3_assemble(so->ir, &info, 320);
if (fd_mesa_debug & FD_DBG_DISASM) {
struct ir3_block *block = so->ir->block;
struct ir3_register *reg;

+ 6
- 2
src/gallium/drivers/freedreno/ir3/ir3_shader.c View File

@@ -56,7 +56,7 @@ assemble_variant(struct ir3_shader_variant *v)
struct fd_context *ctx = fd_context(v->shader->pctx);
uint32_t sz, *bin;

bin = ir3_assemble(v->ir, &v->info);
bin = ir3_assemble(v->ir, &v->info, ctx->screen->gpu_id);
sz = v->info.sizedwords * 4;

v->bo = fd_bo_new(ctx->dev, sz,
@@ -67,7 +67,11 @@ assemble_variant(struct ir3_shader_variant *v)

free(bin);

v->instrlen = v->info.sizedwords / 8;
if (ctx->screen->gpu_id >= 400) {
v->instrlen = v->info.sizedwords / (2 * 16);
} else {
v->instrlen = v->info.sizedwords / (2 * 4);
}

/* NOTE: if relative addressing is used, we set constlen in
* the compiler (to worst-case value) since we don't know in

+ 3
- 2
src/gallium/drivers/freedreno/ir3/ir3_shader.h View File

@@ -111,7 +111,8 @@ struct ir3_shader_variant {
struct ir3 *ir;

/* the instructions length is in units of instruction groups
* (4 instructions, 8 dwords):
* (4 instructions for a3xx, 16 instructions for a4xx.. each
* instruction is 2 dwords):
*/
unsigned instrlen;

@@ -203,7 +204,7 @@ struct ir3_shader {
/* so far, only used for blit_prog shader.. values for
* VPC_VARYING_PS_REPL[i].MODE
*/
uint32_t vpsrepl[4];
uint32_t vpsrepl[8];
};



Loading…
Cancel
Save