소스 검색

Merge commit 'origin/master' into gallium-sampler-view

Conflicts:
	src/gallium/drivers/nv30/nv30_context.h
	src/gallium/drivers/nv30/nv30_state.c
	src/gallium/drivers/nv40/nv40_context.h
	src/gallium/drivers/nv40/nv40_state.c
	src/gallium/drivers/r300/r300_emit.c
tags/mesa-7.9-rc1
Keith Whitwell 15 년 전
부모
커밋
42910ebe7b
100개의 변경된 파일4566개의 추가작업 그리고 9541개의 파일을 삭제
  1. 2
    0
      configs/autoconf.in
  2. 4
    2
      configure.ac
  3. 3
    3
      progs/egl/Makefile
  4. 2
    2
      progs/xdemos/Makefile
  5. 5
    1
      src/gallium/auxiliary/gallivm/lp_bld_sample.c
  6. 65
    0
      src/gallium/drivers/llvmpipe/lp_flush.c
  7. 12
    0
      src/gallium/drivers/llvmpipe/lp_flush.h
  8. 8
    6
      src/gallium/drivers/llvmpipe/lp_rast.c
  9. 24
    46
      src/gallium/drivers/llvmpipe/lp_scene.c
  10. 0
    2
      src/gallium/drivers/llvmpipe/lp_scene.h
  11. 15
    0
      src/gallium/drivers/llvmpipe/lp_surface.c
  12. 106
    62
      src/gallium/drivers/llvmpipe/lp_texture.c
  13. 22
    0
      src/gallium/drivers/llvmpipe/lp_texture.h
  14. 1
    2
      src/gallium/drivers/nouveau/Makefile
  15. 1
    1
      src/gallium/drivers/nouveau/nouveau_util.h
  16. 1
    4
      src/gallium/drivers/nouveau/nouveau_winsys.h
  17. 0
    29
      src/gallium/drivers/nv30/Makefile
  18. 0
    14
      src/gallium/drivers/nv30/nv30_clear.c
  19. 0
    88
      src/gallium/drivers/nv30/nv30_context.c
  20. 0
    224
      src/gallium/drivers/nv30/nv30_context.h
  21. 0
    61
      src/gallium/drivers/nv30/nv30_draw.c
  22. 0
    905
      src/gallium/drivers/nv30/nv30_fragprog.c
  23. 0
    240
      src/gallium/drivers/nv30/nv30_miptree.c
  24. 0
    127
      src/gallium/drivers/nv30/nv30_query.c
  25. 0
    490
      src/gallium/drivers/nv30/nv30_shader.h
  26. 0
    782
      src/gallium/drivers/nv30/nv30_state.c
  27. 0
    88
      src/gallium/drivers/nv30/nv30_state.h
  28. 0
    41
      src/gallium/drivers/nv30/nv30_state_blend.c
  29. 0
    122
      src/gallium/drivers/nv30/nv30_state_emit.c
  30. 0
    173
      src/gallium/drivers/nv30/nv30_state_fb.c
  31. 0
    17
      src/gallium/drivers/nv30/nv30_state_rasterizer.c
  32. 0
    36
      src/gallium/drivers/nv30/nv30_state_scissor.c
  33. 0
    40
      src/gallium/drivers/nv30/nv30_state_stipple.c
  34. 0
    42
      src/gallium/drivers/nv30/nv30_state_viewport.c
  35. 0
    41
      src/gallium/drivers/nv30/nv30_state_zsa.c
  36. 0
    842
      src/gallium/drivers/nv30/nv30_vertprog.c
  37. 0
    29
      src/gallium/drivers/nv40/Makefile
  38. 0
    88
      src/gallium/drivers/nv40/nv40_context.c
  39. 0
    246
      src/gallium/drivers/nv40/nv40_context.h
  40. 0
    360
      src/gallium/drivers/nv40/nv40_draw.c
  41. 0
    127
      src/gallium/drivers/nv40/nv40_query.c
  42. 0
    319
      src/gallium/drivers/nv40/nv40_screen.c
  43. 0
    37
      src/gallium/drivers/nv40/nv40_screen.h
  44. 0
    556
      src/gallium/drivers/nv40/nv40_shader.h
  45. 0
    797
      src/gallium/drivers/nv40/nv40_state.c
  46. 0
    41
      src/gallium/drivers/nv40/nv40_state_blend.c
  47. 0
    189
      src/gallium/drivers/nv40/nv40_state_emit.c
  48. 0
    175
      src/gallium/drivers/nv40/nv40_state_fb.c
  49. 0
    17
      src/gallium/drivers/nv40/nv40_state_rasterizer.c
  50. 0
    36
      src/gallium/drivers/nv40/nv40_state_scissor.c
  51. 0
    39
      src/gallium/drivers/nv40/nv40_state_stipple.c
  52. 0
    38
      src/gallium/drivers/nv40/nv40_state_viewport.c
  53. 0
    41
      src/gallium/drivers/nv40/nv40_state_zsa.c
  54. 0
    64
      src/gallium/drivers/nv40/nv40_surface.c
  55. 0
    181
      src/gallium/drivers/nv40/nv40_transfer.c
  56. 0
    565
      src/gallium/drivers/nv40/nv40_vbo.c
  57. 1
    0
      src/gallium/drivers/nv50/nv50_vbo.c
  58. 32
    0
      src/gallium/drivers/nvfx/Makefile
  59. 0
    1
      src/gallium/drivers/nvfx/nv04_surface_2d.c
  60. 0
    0
      src/gallium/drivers/nvfx/nv04_surface_2d.h
  61. 36
    50
      src/gallium/drivers/nvfx/nv30_fragtex.c
  62. 169
    0
      src/gallium/drivers/nvfx/nv30_vertprog.h
  63. 65
    64
      src/gallium/drivers/nvfx/nv40_fragtex.c
  64. 177
    0
      src/gallium/drivers/nvfx/nv40_vertprog.h
  65. 3
    3
      src/gallium/drivers/nvfx/nvfx_clear.c
  66. 90
    0
      src/gallium/drivers/nvfx/nvfx_context.c
  67. 265
    0
      src/gallium/drivers/nvfx/nvfx_context.h
  68. 350
    0
      src/gallium/drivers/nvfx/nvfx_draw.c
  69. 207
    241
      src/gallium/drivers/nvfx/nvfx_fragprog.c
  70. 49
    0
      src/gallium/drivers/nvfx/nvfx_fragtex.c
  71. 36
    25
      src/gallium/drivers/nvfx/nvfx_miptree.c
  72. 127
    0
      src/gallium/drivers/nvfx/nvfx_query.c
  73. 192
    120
      src/gallium/drivers/nvfx/nvfx_screen.c
  74. 12
    11
      src/gallium/drivers/nvfx/nvfx_screen.h
  75. 429
    0
      src/gallium/drivers/nvfx/nvfx_shader.h
  76. 652
    0
      src/gallium/drivers/nvfx/nvfx_state.c
  77. 13
    21
      src/gallium/drivers/nvfx/nvfx_state.h
  78. 41
    0
      src/gallium/drivers/nvfx/nvfx_state_blend.c
  79. 179
    0
      src/gallium/drivers/nvfx/nvfx_state_emit.c
  80. 234
    0
      src/gallium/drivers/nvfx/nvfx_state_fb.c
  81. 17
    0
      src/gallium/drivers/nvfx/nvfx_state_rasterizer.c
  82. 36
    0
      src/gallium/drivers/nvfx/nvfx_state_scissor.c
  83. 40
    0
      src/gallium/drivers/nvfx/nvfx_state_stipple.c
  84. 51
    0
      src/gallium/drivers/nvfx/nvfx_state_viewport.c
  85. 41
    0
      src/gallium/drivers/nvfx/nvfx_state_zsa.c
  86. 15
    15
      src/gallium/drivers/nvfx/nvfx_surface.c
  87. 133
    0
      src/gallium/drivers/nvfx/nvfx_tex.h
  88. 27
    27
      src/gallium/drivers/nvfx/nvfx_transfer.c
  89. 114
    106
      src/gallium/drivers/nvfx/nvfx_vbo.c
  90. 321
    320
      src/gallium/drivers/nvfx/nvfx_vertprog.c
  91. 1
    0
      src/gallium/drivers/r300/Makefile
  92. 28
    10
      src/gallium/drivers/r300/r300_context.c
  93. 7
    2
      src/gallium/drivers/r300/r300_context.h
  94. 17
    5
      src/gallium/drivers/r300/r300_cs.h
  95. 1
    0
      src/gallium/drivers/r300/r300_debug.c
  96. 41
    34
      src/gallium/drivers/r300/r300_emit.c
  97. 1
    2
      src/gallium/drivers/r300/r300_emit.h
  98. 4
    0
      src/gallium/drivers/r300/r300_reg.h
  99. 41
    6
      src/gallium/drivers/r300/r300_render.c
  100. 0
    0
      src/gallium/drivers/r300/r300_screen.c

+ 2
- 0
configs/autoconf.in 파일 보기

@@ -24,6 +24,8 @@ RADEON_CFLAGS = @RADEON_CFLAGS@
RADEON_LDFLAGS = @RADEON_LDFLAGS@
INTEL_LIBS = @INTEL_LIBS@
INTEL_CFLAGS = @INTEL_CFLAGS@
X_LIBS = @X_LIBS@
X_CFLAGS = @X_CFLAGS@

# Assembler
MESA_ASM_SOURCES = @MESA_ASM_SOURCES@

+ 4
- 2
configure.ac 파일 보기

@@ -547,7 +547,9 @@ else
x11_pkgconfig=no
fi
dnl Use the autoconf macro if no pkg-config files
if test "$x11_pkgconfig" = no; then
if test "$x11_pkgconfig" = yes; then
PKG_CHECK_MODULES([X], [x11])
else
AC_PATH_XTRA
fi

@@ -1358,7 +1360,7 @@ AC_ARG_ENABLE([gallium-nouveau],
[enable_gallium_nouveau=no])
if test "x$enable_gallium_nouveau" = xyes; then
GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS nouveau"
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nv30 nv40 nv50"
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50"
fi

dnl

+ 3
- 3
progs/egl/Makefile 파일 보기

@@ -57,13 +57,13 @@ peglgears: peglgears.o $(HEADERS) $(LIB_DEP)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(LIBDRM_LIB) -lm

xeglgears: xeglgears.o $(HEADERS) $(LIB_DEP)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm -L$(libdir) -lX11
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm $(X_LIBS)

xeglthreads: xeglthreads.o $(HEADERS) $(LIB_DEP)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm -L$(libdir) -lX11
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm $(X_LIBS)

xegl_tri: xegl_tri.o $(HEADERS) $(LIB_DEP)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm -L$(libdir) -lX11
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm $(X_LIBS)

clean:
-rm -f *.o *~

+ 2
- 2
progs/xdemos/Makefile 파일 보기

@@ -9,9 +9,9 @@ INCDIR = $(TOP)/include
LIB_DEP = $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)

# Add X11 and pthread libs to satisfy GNU gold.
APP_LIB_DEPS += -lX11 -lpthread
APP_LIB_DEPS += $(X_LIBS) -lpthread

LIBS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(libdir) $(APP_LIB_DEPS)
LIBS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) $(APP_LIB_DEPS)

PROGS = \
corender \

+ 5
- 1
src/gallium/auxiliary/gallivm/lp_bld_sample.c 파일 보기

@@ -84,8 +84,12 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->wrap_t = sampler->wrap_t;
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
state->min_mip_filter = sampler->min_mip_filter;
state->mag_img_filter = sampler->mag_img_filter;
if (texture->last_level) {
state->min_mip_filter = sampler->min_mip_filter;
} else {
state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}

state->compare_mode = sampler->compare_mode;
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {

+ 65
- 0
src/gallium/drivers/llvmpipe/lp_flush.c 파일 보기

@@ -92,3 +92,68 @@ llvmpipe_flush( struct pipe_context *pipe,
#endif
}


/**
* Flush context if necessary.
*
* TODO: move this logic to an auxiliary library?
*
* FIXME: We must implement DISCARD/DONTBLOCK/UNSYNCHRONIZED/etc for
* textures to avoid blocking.
*/
boolean
llvmpipe_flush_texture(struct pipe_context *pipe,
struct pipe_texture *texture,
unsigned face,
unsigned level,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
boolean do_not_flush)
{
struct pipe_fence_handle *last_fence = NULL;
unsigned referenced;

referenced = pipe->is_texture_referenced(pipe, texture, face, level);

if ((referenced & PIPE_REFERENCED_FOR_WRITE) ||
((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) {

if (do_not_flush)
return FALSE;

/*
* TODO: The semantics of these flush flags are too obtuse. They should
* disappear and the pipe driver should just ensure that all visible
* side-effects happen when they need to happen.
*/
if (referenced & PIPE_REFERENCED_FOR_WRITE)
flush_flags |= PIPE_FLUSH_RENDER_CACHE;

if (referenced & PIPE_REFERENCED_FOR_READ)
flush_flags |= PIPE_FLUSH_TEXTURE_CACHE;

if (cpu_access) {
/*
* Flush and wait.
*/

struct pipe_fence_handle *fence = NULL;

pipe->flush(pipe, flush_flags, &fence);

if (last_fence) {
pipe->screen->fence_finish(pipe->screen, fence, 0);
pipe->screen->fence_reference(pipe->screen, &fence, NULL);
}
} else {
/*
* Just flush.
*/

pipe->flush(pipe, flush_flags, NULL);
}
}

return TRUE;
}

+ 12
- 0
src/gallium/drivers/llvmpipe/lp_flush.h 파일 보기

@@ -28,10 +28,22 @@
#ifndef LP_FLUSH_H
#define LP_FLUSH_H

#include "pipe/p_compiler.h"

struct pipe_context;
struct pipe_fence_handle;

void llvmpipe_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence);

boolean
llvmpipe_flush_texture(struct pipe_context *pipe,
struct pipe_texture *texture,
unsigned face,
unsigned level,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
boolean do_not_flush);

#endif

+ 8
- 6
src/gallium/drivers/llvmpipe/lp_rast.c 파일 보기

@@ -62,18 +62,20 @@ lp_rast_begin( struct lp_rasterizer *rast,
rast->state.write_color = write_color;
for (i = 0; i < rast->state.nr_cbufs; i++) {
struct pipe_surface *cbuf = scene->fb.cbufs[i];
rast->cbuf[i].map = scene->cbuf_map[i];
rast->cbuf[i].format = scene->cbuf_transfer[i]->texture->format;
rast->cbuf[i].width = scene->cbuf_transfer[i]->width;
rast->cbuf[i].height = scene->cbuf_transfer[i]->height;
rast->cbuf[i].stride = scene->cbuf_transfer[i]->stride;
rast->cbuf[i].format = cbuf->texture->format;
rast->cbuf[i].width = cbuf->width;
rast->cbuf[i].height = cbuf->height;
rast->cbuf[i].stride = llvmpipe_texture_stride(cbuf->texture, cbuf->level);
}

if (write_zstencil) {
struct pipe_surface *zsbuf = scene->fb.zsbuf;
rast->zsbuf.map = scene->zsbuf_map;
rast->zsbuf.stride = scene->zsbuf_transfer->stride;
rast->zsbuf.stride = llvmpipe_texture_stride(zsbuf->texture, zsbuf->level);
rast->zsbuf.blocksize =
util_format_get_blocksize(scene->zsbuf_transfer->texture->format);
util_format_get_blocksize(zsbuf->texture->format);
}

lp_scene_bin_iter_begin( scene );

+ 24
- 46
src/gallium/drivers/llvmpipe/lp_scene.c 파일 보기

@@ -397,7 +397,6 @@ end:
static boolean
lp_scene_map_buffers( struct lp_scene *scene )
{
struct pipe_context *pipe = scene->pipe;
struct pipe_surface *cbuf, *zsbuf;
int i;

@@ -409,20 +408,10 @@ lp_scene_map_buffers( struct lp_scene *scene )
for (i = 0; i < scene->fb.nr_cbufs; i++) {
cbuf = scene->fb.cbufs[i];
if (cbuf) {
scene->cbuf_transfer[i] = pipe->get_tex_transfer(pipe,
cbuf->texture,
cbuf->face,
cbuf->level,
cbuf->zslice,
PIPE_TRANSFER_READ_WRITE,
0, 0,
cbuf->width,
cbuf->height);
if (!scene->cbuf_transfer[i])
goto fail;

scene->cbuf_map[i] = pipe->transfer_map(pipe,
scene->cbuf_transfer[i]);
scene->cbuf_map[i] = llvmpipe_texture_map(cbuf->texture,
cbuf->face,
cbuf->level,
cbuf->zslice);
if (!scene->cbuf_map[i])
goto fail;
}
@@ -432,20 +421,10 @@ lp_scene_map_buffers( struct lp_scene *scene )
*/
zsbuf = scene->fb.zsbuf;
if (zsbuf) {
scene->zsbuf_transfer = pipe->get_tex_transfer(pipe,
zsbuf->texture,
zsbuf->face,
zsbuf->level,
zsbuf->zslice,
PIPE_TRANSFER_READ_WRITE,
0, 0,
zsbuf->width,
zsbuf->height);
if (!scene->zsbuf_transfer)
goto fail;

scene->zsbuf_map = pipe->transfer_map(pipe,
scene->zsbuf_transfer);
scene->zsbuf_map = llvmpipe_texture_map(zsbuf->texture,
zsbuf->face,
zsbuf->level,
zsbuf->zslice);
if (!scene->zsbuf_map)
goto fail;
}
@@ -469,28 +448,27 @@ fail:
static void
lp_scene_unmap_buffers( struct lp_scene *scene )
{
struct pipe_context *pipe = scene->pipe;
unsigned i;

for (i = 0; i < scene->fb.nr_cbufs; i++) {
if (scene->cbuf_map[i])
pipe->transfer_unmap(pipe, scene->cbuf_transfer[i]);
if (scene->cbuf_transfer[i])
pipe->tex_transfer_destroy(pipe, scene->cbuf_transfer[i]);
scene->cbuf_transfer[i] = NULL;
scene->cbuf_map[i] = NULL;
if (scene->cbuf_map[i]) {
struct pipe_surface *cbuf = scene->fb.cbufs[i];
llvmpipe_texture_unmap(cbuf->texture,
cbuf->face,
cbuf->level,
cbuf->zslice);
scene->cbuf_map[i] = NULL;
}
}

if (scene->zsbuf_map)
pipe->transfer_unmap(pipe, scene->zsbuf_transfer);
if (scene->zsbuf_transfer)
pipe->tex_transfer_destroy(pipe, scene->zsbuf_transfer);
scene->zsbuf_transfer = NULL;
scene->zsbuf_map = NULL;
if (scene->zsbuf_map) {
struct pipe_surface *zsbuf = scene->fb.zsbuf;
llvmpipe_texture_unmap(zsbuf->texture,
zsbuf->face,
zsbuf->level,
zsbuf->zslice);
scene->zsbuf_map = NULL;
}

util_unreference_framebuffer_state( &scene->fb );
}

+ 0
- 2
src/gallium/drivers/llvmpipe/lp_scene.h 파일 보기

@@ -114,8 +114,6 @@ struct texture_ref {
*/
struct lp_scene {
struct pipe_context *pipe;
struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS];
struct pipe_transfer *zsbuf_transfer;

/* Scene's buffers are mapped at the time the scene is enqueued:
*/

+ 15
- 0
src/gallium/drivers/llvmpipe/lp_surface.c 파일 보기

@@ -27,6 +27,7 @@

#include "util/u_rect.h"
#include "lp_context.h"
#include "lp_flush.h"
#include "lp_surface.h"


@@ -36,6 +37,20 @@ lp_surface_copy(struct pipe_context *pipe,
struct pipe_surface *src, unsigned srcx, unsigned srcy,
unsigned width, unsigned height)
{
llvmpipe_flush_texture(pipe,
dest->texture, dest->face, dest->level,
0, /* flush_flags */
FALSE, /* read_only */
FALSE, /* cpu_access */
FALSE); /* do_not_flush */

llvmpipe_flush_texture(pipe,
src->texture, src->face, src->level,
0, /* flush_flags */
TRUE, /* read_only */
FALSE, /* cpu_access */
FALSE); /* do_not_flush */

util_surface_copy(pipe, FALSE,
dest, destx, desty,
src, srcx, srcy,

+ 106
- 62
src/gallium/drivers/llvmpipe/lp_texture.c 파일 보기

@@ -40,6 +40,7 @@

#include "lp_context.h"
#include "lp_screen.h"
#include "lp_flush.h"
#include "lp_texture.h"
#include "lp_tile_size.h"
#include "state_tracker/sw_winsys.h"
@@ -163,6 +164,92 @@ llvmpipe_texture_destroy(struct pipe_texture *pt)
}


/**
* Map a texture. Without any synchronization.
*/
void *
llvmpipe_texture_map(struct pipe_texture *texture,
unsigned face,
unsigned level,
unsigned zslice)
{
struct llvmpipe_texture *lpt = llvmpipe_texture(texture);
uint8_t *map;

if (lpt->dt) {
/* display target */
struct llvmpipe_screen *screen = llvmpipe_screen(texture->screen);
struct sw_winsys *winsys = screen->winsys;
const unsigned usage = PIPE_BUFFER_USAGE_CPU_READ_WRITE;

assert(face == 0);
assert(level == 0);
assert(zslice == 0);

/* FIXME: keep map count? */
map = winsys->displaytarget_map(winsys, lpt->dt, usage);
}
else {
/* regular texture */
unsigned offset;
unsigned stride;

map = lpt->data;

assert(level < LP_MAX_TEXTURE_2D_LEVELS);

offset = lpt->level_offset[level];
stride = lpt->stride[level];

/* XXX shouldn't that rather be
tex_height = align(u_minify(texture->height0, level), 2)
to account for alignment done in llvmpipe_texture_layout ?
*/
if (texture->target == PIPE_TEXTURE_CUBE) {
unsigned tex_height = u_minify(texture->height0, level);
offset += face * util_format_get_nblocksy(texture->format, tex_height) * stride;
}
else if (texture->target == PIPE_TEXTURE_3D) {
unsigned tex_height = u_minify(texture->height0, level);
offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * stride;
}
else {
assert(face == 0);
assert(zslice == 0);
}

map += offset;
}

return map;
}


/**
* Unmap a texture. Without any synchronization.
*/
void
llvmpipe_texture_unmap(struct pipe_texture *texture,
unsigned face,
unsigned level,
unsigned zslice)
{
struct llvmpipe_texture *lpt = llvmpipe_texture(texture);

if (lpt->dt) {
/* display target */
struct llvmpipe_screen *lp_screen = llvmpipe_screen(texture->screen);
struct sw_winsys *winsys = lp_screen->winsys;

assert(face == 0);
assert(level == 0);
assert(zslice == 0);

winsys->displaytarget_unmap(winsys, lpt->dt);
}
}


static struct pipe_surface *
llvmpipe_get_tex_surface(struct pipe_screen *screen,
struct pipe_texture *pt,
@@ -181,7 +268,6 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
ps->format = pt->format;
ps->width = u_minify(pt->width0, level);
ps->height = u_minify(pt->height0, level);
ps->offset = lpt->level_offset[level];
ps->usage = usage;

/* Because we are llvmpipe, anything that the state tracker
@@ -207,23 +293,6 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
ps->face = face;
ps->level = level;
ps->zslice = zslice;

/* XXX shouldn't that rather be
tex_height = align(ps->height, 2);
to account for alignment done in llvmpipe_texture_layout ?
*/
if (pt->target == PIPE_TEXTURE_CUBE) {
unsigned tex_height = ps->height;
ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
}
else if (pt->target == PIPE_TEXTURE_3D) {
unsigned tex_height = ps->height;
ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
}
else {
assert(face == 0);
assert(zslice == 0);
}
}
return ps;
}
@@ -269,24 +338,6 @@ llvmpipe_get_tex_transfer(struct pipe_context *pipe,
pt->level = level;
pt->zslice = zslice;

lpt->offset = lptex->level_offset[level];

/* XXX shouldn't that rather be
tex_height = align(u_minify(texture->height0, level), 2)
to account for alignment done in llvmpipe_texture_layout ?
*/
if (texture->target == PIPE_TEXTURE_CUBE) {
unsigned tex_height = u_minify(texture->height0, level);
lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride;
}
else if (texture->target == PIPE_TEXTURE_3D) {
unsigned tex_height = u_minify(texture->height0, level);
lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride;
}
else {
assert(face == 0);
assert(zslice == 0);
}
return pt;
}
return NULL;
@@ -312,7 +363,7 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
ubyte *map, *xfer_map;
ubyte *map;
struct llvmpipe_texture *lpt;
enum pipe_format format;

@@ -320,34 +371,34 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
lpt = llvmpipe_texture(transfer->texture);
format = lpt->base.format;

if (lpt->dt) {
/* display target */
struct sw_winsys *winsys = screen->winsys;
/*
* Transfers, like other pipe operations, must happen in order, so flush the
* context if necessary.
*/
llvmpipe_flush_texture(pipe,
transfer->texture, transfer->face, transfer->level,
0, /* flush_flags */
!(transfer->usage & PIPE_TRANSFER_WRITE), /* read_only */
TRUE, /* cpu_access */
FALSE); /* do_not_flush */

map = winsys->displaytarget_map(winsys, lpt->dt,
pipe_transfer_buffer_flags(transfer));
if (map == NULL)
return NULL;
}
else {
/* regular texture */
map = lpt->data;
}
map = llvmpipe_texture_map(transfer->texture,
transfer->face, transfer->level, transfer->zslice);

/* May want to different things here depending on read/write nature
* of the map:
*/
if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) {
if (transfer->usage & PIPE_TRANSFER_WRITE) {
/* Do something to notify sharing contexts of a texture change.
*/
screen->timestamp++;
}
xfer_map = map + llvmpipe_transfer(transfer)->offset +
map +=
transfer->y / util_format_get_blockheight(format) * transfer->stride +
transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
/*printf("map = %p xfer map = %p\n", map, xfer_map);*/
return xfer_map;
return map;
}


@@ -355,17 +406,10 @@ static void
llvmpipe_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
struct llvmpipe_screen *lp_screen = llvmpipe_screen(pipe->screen);
struct llvmpipe_texture *lpt;

assert(transfer->texture);
lpt = llvmpipe_texture(transfer->texture);

if (lpt->dt) {
/* display target */
struct sw_winsys *winsys = lp_screen->winsys;
winsys->displaytarget_unmap(winsys, lpt->dt);
}
llvmpipe_texture_unmap(transfer->texture,
transfer->face, transfer->level, transfer->zslice);
}



+ 22
- 0
src/gallium/drivers/llvmpipe/lp_texture.h 파일 보기

@@ -95,6 +95,28 @@ llvmpipe_transfer(struct pipe_transfer *pt)
}


static INLINE unsigned
llvmpipe_texture_stride(struct pipe_texture *texture,
unsigned level)
{
struct llvmpipe_texture *lpt = llvmpipe_texture(texture);
assert(level < LP_MAX_TEXTURE_2D_LEVELS);
return lpt->stride[level];
}


void *
llvmpipe_texture_map(struct pipe_texture *texture,
unsigned face,
unsigned level,
unsigned zslice);

void
llvmpipe_texture_unmap(struct pipe_texture *texture,
unsigned face,
unsigned level,
unsigned zslice);

extern void
llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen);


+ 1
- 2
src/gallium/drivers/nouveau/Makefile 파일 보기

@@ -4,7 +4,6 @@ include $(TOP)/configs/current
LIBNAME = nouveau

C_SOURCES = nouveau_screen.c \
nouveau_context.c \
nv04_surface_2d.c
nouveau_context.c

include ../../Makefile.template

+ 1
- 1
src/gallium/drivers/nouveau/nouveau_util.h 파일 보기

@@ -33,7 +33,7 @@ nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
max = max - (max % 3);
break;
case PIPE_PRIM_QUADS:
max = max & 3;
max = max & ~3;
break;
case PIPE_PRIM_LINE_LOOP:
case PIPE_PRIM_LINE_STRIP:

+ 1
- 4
src/gallium/drivers/nouveau/nouveau_winsys.h 파일 보기

@@ -27,10 +27,7 @@
#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19)

extern struct pipe_screen *
nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *);

extern struct pipe_screen *
nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);

extern struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);

+ 0
- 29
src/gallium/drivers/nv30/Makefile 파일 보기

@@ -1,29 +0,0 @@
TOP = ../../../..
include $(TOP)/configs/current

LIBNAME = nv30

C_SOURCES = \
nv30_clear.c \
nv30_context.c \
nv30_draw.c \
nv30_fragprog.c \
nv30_fragtex.c \
nv30_miptree.c \
nv30_query.c \
nv30_screen.c \
nv30_state.c \
nv30_state_blend.c \
nv30_state_emit.c \
nv30_state_fb.c \
nv30_state_rasterizer.c \
nv30_state_scissor.c \
nv30_state_stipple.c \
nv30_state_viewport.c \
nv30_state_zsa.c \
nv30_surface.c \
nv30_transfer.c \
nv30_vbo.c \
nv30_vertprog.c

include ../../Makefile.template

+ 0
- 14
src/gallium/drivers/nv30/nv30_clear.c 파일 보기

@@ -1,14 +0,0 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_clear.h"

#include "nv30_context.h"

void
nv30_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil)
{
util_clear(pipe, &nv30_context(pipe)->framebuffer, buffers, rgba, depth,
stencil);
}

+ 0
- 88
src/gallium/drivers/nv30/nv30_context.c 파일 보기

@@ -1,88 +0,0 @@
#include "draw/draw_context.h"
#include "pipe/p_defines.h"

#include "nv30_context.h"
#include "nv30_screen.h"

static void
nv30_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_screen *screen = nv30->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;

if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
BEGIN_RING(chan, rankine, 0x1fd8, 1);
OUT_RING (chan, 2);
BEGIN_RING(chan, rankine, 0x1fd8, 1);
OUT_RING (chan, 1);
}

FIRE_RING(chan);
if (fence)
*fence = NULL;
}

static void
nv30_destroy(struct pipe_context *pipe)
{
struct nv30_context *nv30 = nv30_context(pipe);
unsigned i;

for (i = 0; i < NV30_STATE_MAX; i++) {
if (nv30->state.hw[i])
so_ref(NULL, &nv30->state.hw[i]);
}

if (nv30->draw)
draw_destroy(nv30->draw);
FREE(nv30);
}

struct pipe_context *
nv30_create(struct pipe_screen *pscreen, void *priv)
{
struct nv30_screen *screen = nv30_screen(pscreen);
struct pipe_winsys *ws = pscreen->winsys;
struct nv30_context *nv30;
struct nouveau_winsys *nvws = screen->nvws;

nv30 = CALLOC(1, sizeof(struct nv30_context));
if (!nv30)
return NULL;
nv30->screen = screen;

nv30->nvws = nvws;

nv30->pipe.winsys = ws;
nv30->pipe.screen = pscreen;
nv30->pipe.priv = priv;
nv30->pipe.destroy = nv30_destroy;
nv30->pipe.draw_arrays = nv30_draw_arrays;
nv30->pipe.draw_elements = nv30_draw_elements;
nv30->pipe.clear = nv30_clear;
nv30->pipe.flush = nv30_flush;

nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;

screen->base.channel->user_private = nv30;
screen->base.channel->flush_notify = nv30_state_flush_notify;

nv30_init_query_functions(nv30);
nv30_init_surface_functions(nv30);
nv30_init_state_functions(nv30);
nv30_init_transfer_functions(nv30);

/* Create, configure, and install fallback swtnl path */
nv30->draw = draw_create();
draw_wide_point_threshold(nv30->draw, 9999999.0);
draw_wide_line_threshold(nv30->draw, 9999999.0);
draw_enable_line_stipple(nv30->draw, FALSE);
draw_enable_point_sprites(nv30->draw, FALSE);
draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30));

return &nv30->pipe;
}

+ 0
- 224
src/gallium/drivers/nv30/nv30_context.h 파일 보기

@@ -1,224 +0,0 @@
#ifndef __NV30_CONTEXT_H__
#define __NV30_CONTEXT_H__

#include <stdio.h>

#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_compiler.h"

#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_inlines.h"

#include "draw/draw_vertex.h"

#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
#include "nouveau/nouveau_stateobj.h"

#include "nv30_state.h"

#define NOUVEAU_ERR(fmt, args...) \
fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);

enum nv30_state_index {
NV30_STATE_FB = 0,
NV30_STATE_VIEWPORT = 1,
NV30_STATE_BLEND = 2,
NV30_STATE_RAST = 3,
NV30_STATE_ZSA = 4,
NV30_STATE_BCOL = 5,
NV30_STATE_CLIP = 6,
NV30_STATE_SCISSOR = 7,
NV30_STATE_STIPPLE = 8,
NV30_STATE_FRAGPROG = 9,
NV30_STATE_VERTPROG = 10,
NV30_STATE_FRAGTEX0 = 11,
NV30_STATE_FRAGTEX1 = 12,
NV30_STATE_FRAGTEX2 = 13,
NV30_STATE_FRAGTEX3 = 14,
NV30_STATE_FRAGTEX4 = 15,
NV30_STATE_FRAGTEX5 = 16,
NV30_STATE_FRAGTEX6 = 17,
NV30_STATE_FRAGTEX7 = 18,
NV30_STATE_FRAGTEX8 = 19,
NV30_STATE_FRAGTEX9 = 20,
NV30_STATE_FRAGTEX10 = 21,
NV30_STATE_FRAGTEX11 = 22,
NV30_STATE_FRAGTEX12 = 23,
NV30_STATE_FRAGTEX13 = 24,
NV30_STATE_FRAGTEX14 = 25,
NV30_STATE_FRAGTEX15 = 26,
NV30_STATE_VERTTEX0 = 27,
NV30_STATE_VERTTEX1 = 28,
NV30_STATE_VERTTEX2 = 29,
NV30_STATE_VERTTEX3 = 30,
NV30_STATE_VTXBUF = 31,
NV30_STATE_VTXFMT = 32,
NV30_STATE_VTXATTR = 33,
NV30_STATE_SR = 34,
NV30_STATE_MAX = 35
};

#include "nv30_screen.h"

#define NV30_NEW_BLEND (1 << 0)
#define NV30_NEW_RAST (1 << 1)
#define NV30_NEW_ZSA (1 << 2)
#define NV30_NEW_SAMPLER (1 << 3)
#define NV30_NEW_FB (1 << 4)
#define NV30_NEW_STIPPLE (1 << 5)
#define NV30_NEW_SCISSOR (1 << 6)
#define NV30_NEW_VIEWPORT (1 << 7)
#define NV30_NEW_BCOL (1 << 8)
#define NV30_NEW_VERTPROG (1 << 9)
#define NV30_NEW_FRAGPROG (1 << 10)
#define NV30_NEW_ARRAYS (1 << 11)
#define NV30_NEW_UCP (1 << 12)
#define NV30_NEW_SR (1 << 13)

struct nv30_rasterizer_state {
struct pipe_rasterizer_state pipe;
struct nouveau_stateobj *so;
};

struct nv30_zsa_state {
struct pipe_depth_stencil_alpha_state pipe;
struct nouveau_stateobj *so;
};

struct nv30_blend_state {
struct pipe_blend_state pipe;
struct nouveau_stateobj *so;
};


struct nv30_state {
unsigned scissor_enabled;
unsigned stipple_enabled;
unsigned fp_samplers;

uint64_t dirty;
struct nouveau_stateobj *hw[NV30_STATE_MAX];
};

struct nv30_vtxelt_state {
struct pipe_vertex_element pipe[16];
unsigned num_elements;
};

struct nv30_context {
struct pipe_context pipe;

struct nouveau_winsys *nvws;
struct nv30_screen *screen;

struct draw_context *draw;

/* HW state derived from pipe states */
struct nv30_state state;

/* Context state */
unsigned dirty;
struct pipe_scissor_state scissor;
unsigned stipple[32];
struct nv30_vertex_program *vertprog;
struct nv30_fragment_program *fragprog;
struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
unsigned constbuf_nr[PIPE_SHADER_TYPES];
struct nv30_rasterizer_state *rasterizer;
struct nv30_zsa_state *zsa;
struct nv30_blend_state *blend;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state framebuffer;
struct pipe_buffer *idxbuf;
unsigned idxbuf_format;
struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
unsigned nr_textures;
unsigned dirty_samplers;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
struct nv30_vtxelt_state *vtxelt;
};

static INLINE struct nv30_context *
nv30_context(struct pipe_context *pipe)
{
return (struct nv30_context *)pipe;
}

struct nv30_state_entry {
boolean (*validate)(struct nv30_context *nv30);
struct {
unsigned pipe;
unsigned hw;
} dirty;
};

extern void nv30_init_state_functions(struct nv30_context *nv30);
extern void nv30_init_surface_functions(struct nv30_context *nv30);
extern void nv30_init_query_functions(struct nv30_context *nv30);
extern void nv30_init_transfer_functions(struct nv30_context *nv30);

extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen);

/* nv30_draw.c */
extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30);

/* nv30_vertprog.c */
extern void nv30_vertprog_destroy(struct nv30_context *,
struct nv30_vertex_program *);

/* nv30_fragprog.c */
extern void nv30_fragprog_destroy(struct nv30_context *,
struct nv30_fragment_program *);

/* nv30_fragtex.c */
extern void nv30_fragtex_bind(struct nv30_context *);

/* nv30_state.c and friends */
extern boolean nv30_state_validate(struct nv30_context *nv30);
extern void nv30_state_emit(struct nv30_context *nv30);
extern void nv30_state_flush_notify(struct nouveau_channel *chan);
extern struct nv30_state_entry nv30_state_rasterizer;
extern struct nv30_state_entry nv30_state_scissor;
extern struct nv30_state_entry nv30_state_stipple;
extern struct nv30_state_entry nv30_state_fragprog;
extern struct nv30_state_entry nv30_state_vertprog;
extern struct nv30_state_entry nv30_state_blend;
extern struct nv30_state_entry nv30_state_blend_colour;
extern struct nv30_state_entry nv30_state_zsa;
extern struct nv30_state_entry nv30_state_viewport;
extern struct nv30_state_entry nv30_state_framebuffer;
extern struct nv30_state_entry nv30_state_fragtex;
extern struct nv30_state_entry nv30_state_vbo;
extern struct nv30_state_entry nv30_state_sr;

/* nv30_vbo.c */
extern void nv30_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
extern void nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
unsigned count);

/* nv30_clear.c */
extern void nv30_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil);

/* nv30_context.c */
struct pipe_context *
nv30_create(struct pipe_screen *pscreen, void *priv);

#endif

+ 0
- 61
src/gallium/drivers/nv30/nv30_draw.c 파일 보기

@@ -1,61 +0,0 @@
#include "draw/draw_pipe.h"

#include "nv30_context.h"

struct nv30_draw_stage {
struct draw_stage draw;
struct nv30_context *nv30;
};

static void
nv30_draw_point(struct draw_stage *draw, struct prim_header *prim)
{
NOUVEAU_ERR("\n");
}

static void
nv30_draw_line(struct draw_stage *draw, struct prim_header *prim)
{
NOUVEAU_ERR("\n");
}

static void
nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim)
{
NOUVEAU_ERR("\n");
}

static void
nv30_draw_flush(struct draw_stage *draw, unsigned flags)
{
}

static void
nv30_draw_reset_stipple_counter(struct draw_stage *draw)
{
NOUVEAU_ERR("\n");
}

static void
nv30_draw_destroy(struct draw_stage *draw)
{
FREE(draw);
}

struct draw_stage *
nv30_draw_render_stage(struct nv30_context *nv30)
{
struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage);

nv30draw->nv30 = nv30;
nv30draw->draw.draw = nv30->draw;
nv30draw->draw.point = nv30_draw_point;
nv30draw->draw.line = nv30_draw_line;
nv30draw->draw.tri = nv30_draw_tri;
nv30draw->draw.flush = nv30_draw_flush;
nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter;
nv30draw->draw.destroy = nv30_draw_destroy;

return &nv30draw->draw;
}


+ 0
- 905
src/gallium/drivers/nv30/nv30_fragprog.c 파일 보기

@@ -1,905 +0,0 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"

#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"

#include "nv30_context.h"

#define SWZ_X 0
#define SWZ_Y 1
#define SWZ_Z 2
#define SWZ_W 3
#define MASK_X 1
#define MASK_Y 2
#define MASK_Z 4
#define MASK_W 8
#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
#define DEF_CTEST NV30_FP_OP_COND_TR
#include "nv30_shader.h"

#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
#define neg(s) nv30_sr_neg((s))
#define abs(s) nv30_sr_abs((s))
#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)

#define MAX_CONSTS 128
#define MAX_IMM 32
struct nv30_fpc {
struct nv30_fragment_program *fp;

uint attrib_map[PIPE_MAX_SHADER_INPUTS];

int high_temp;
int temp_temp_count;
int num_regs;

uint depth_id;
uint colour_id;

unsigned inst_offset;

struct {
int pipe;
float vals[4];
} consts[MAX_CONSTS];
int nr_consts;

struct nv30_sreg imm[MAX_IMM];
unsigned nr_imm;
};

static INLINE struct nv30_sreg
temp(struct nv30_fpc *fpc)
{
int idx;

idx = fpc->temp_temp_count++;
idx += fpc->high_temp + 1;
return nv30_sr(NV30SR_TEMP, idx);
}

static INLINE struct nv30_sreg
constant(struct nv30_fpc *fpc, int pipe, float vals[4])
{
int idx;

if (fpc->nr_consts == MAX_CONSTS)
assert(0);
idx = fpc->nr_consts++;

fpc->consts[idx].pipe = pipe;
if (pipe == -1)
memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
return nv30_sr(NV30SR_CONST, idx);
}

#define arith(cc,s,o,d,m,s0,s1,s2) \
nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
(d), (m), (s0), (s1), (s2))
#define tex(cc,s,o,u,d,m,s0,s1,s2) \
nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
(d), (m), (s0), none, none)

static void
grow_insns(struct nv30_fpc *fpc, int size)
{
struct nv30_fragment_program *fp = fpc->fp;

fp->insn_len += size;
fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
}

static void
emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
{
struct nv30_fragment_program *fp = fpc->fp;
uint32_t *hw = &fp->insn[fpc->inst_offset];
uint32_t sr = 0;

switch (src.type) {
case NV30SR_INPUT:
sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
break;
case NV30SR_OUTPUT:
sr |= NV30_FP_REG_SRC_HALF;
/* fall-through */
case NV30SR_TEMP:
sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
break;
case NV30SR_CONST:
grow_insns(fpc, 4);
hw = &fp->insn[fpc->inst_offset];
if (fpc->consts[src.index].pipe >= 0) {
struct nv30_fragment_program_data *fpd;

fp->consts = realloc(fp->consts, ++fp->nr_consts *
sizeof(*fpd));
fpd = &fp->consts[fp->nr_consts - 1];
fpd->offset = fpc->inst_offset + 4;
fpd->index = fpc->consts[src.index].pipe;
memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
} else {
memcpy(&fp->insn[fpc->inst_offset + 4],
fpc->consts[src.index].vals,
sizeof(uint32_t) * 4);
}

sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
break;
case NV30SR_NONE:
sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
break;
default:
assert(0);
}

if (src.negate)
sr |= NV30_FP_REG_NEGATE;

if (src.abs)
hw[1] |= (1 << (29 + pos));

sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
(src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
(src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
(src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));

hw[pos + 1] |= sr;
}

static void
emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
{
struct nv30_fragment_program *fp = fpc->fp;
uint32_t *hw = &fp->insn[fpc->inst_offset];

switch (dst.type) {
case NV30SR_TEMP:
if (fpc->num_regs < (dst.index + 1))
fpc->num_regs = dst.index + 1;
break;
case NV30SR_OUTPUT:
if (dst.index == 1) {
fp->fp_control |= 0xe;
} else {
hw[0] |= NV30_FP_OP_OUT_REG_HALF;
}
break;
case NV30SR_NONE:
hw[0] |= (1 << 30);
break;
default:
assert(0);
}

hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
}

static void
nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
struct nv30_sreg dst, int mask,
struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
{
struct nv30_fragment_program *fp = fpc->fp;
uint32_t *hw;

fpc->inst_offset = fp->insn_len;
grow_insns(fpc, 4);
hw = &fp->insn[fpc->inst_offset];
memset(hw, 0, sizeof(uint32_t) * 4);

if (op == NV30_FP_OP_OPCODE_KIL)
fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);

if (sat)
hw[0] |= NV30_FP_OP_OUT_SAT;

if (dst.cc_update)
hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
(dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
(dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
(dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));

emit_dst(fpc, dst);
emit_src(fpc, 0, s0);
emit_src(fpc, 1, s1);
emit_src(fpc, 2, s2);
}

static void
nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
struct nv30_sreg dst, int mask,
struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
{
struct nv30_fragment_program *fp = fpc->fp;

nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);

fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
fp->samplers |= (1 << unit);
}

static INLINE struct nv30_sreg
tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
struct nv30_sreg src;

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
src = nv30_sr(NV30SR_INPUT,
fpc->attrib_map[fsrc->Register.Index]);
break;
case TGSI_FILE_CONSTANT:
src = constant(fpc, fsrc->Register.Index, NULL);
break;
case TGSI_FILE_IMMEDIATE:
assert(fsrc->Register.Index < fpc->nr_imm);
src = fpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
if (fpc->high_temp < src.index)
fpc->high_temp = src.index;
break;
/* This is clearly insane, but gallium hands us shaders like this.
* Luckily fragprog results are just temp regs..
*/
case TGSI_FILE_OUTPUT:
if (fsrc->Register.Index == fpc->colour_id)
return nv30_sr(NV30SR_OUTPUT, 0);
else
return nv30_sr(NV30SR_OUTPUT, 1);
break;
default:
NOUVEAU_ERR("bad src file\n");
break;
}

src.abs = fsrc->Register.Absolute;
src.negate = fsrc->Register.Negate;
src.swz[0] = fsrc->Register.SwizzleX;
src.swz[1] = fsrc->Register.SwizzleY;
src.swz[2] = fsrc->Register.SwizzleZ;
src.swz[3] = fsrc->Register.SwizzleW;
return src;
}

static INLINE struct nv30_sreg
tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
int idx;

switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
if (fdst->Register.Index == fpc->colour_id)
return nv30_sr(NV30SR_OUTPUT, 0);
else
return nv30_sr(NV30SR_OUTPUT, 1);
break;
case TGSI_FILE_TEMPORARY:
idx = fdst->Register.Index + 1;
if (fpc->high_temp < idx)
fpc->high_temp = idx;
return nv30_sr(NV30SR_TEMP, idx);
case TGSI_FILE_NULL:
return nv30_sr(NV30SR_NONE, 0);
default:
NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
return nv30_sr(NV30SR_NONE, 0);
}
}

static INLINE int
tgsi_mask(uint tgsi)
{
int mask = 0;

if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
return mask;
}

static boolean
src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
struct nv30_sreg *src)
{
const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
uint mask = 0;
uint c;

for (c = 0; c < 4; c++) {
switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
case TGSI_SWIZZLE_X:
case TGSI_SWIZZLE_Y:
case TGSI_SWIZZLE_Z:
case TGSI_SWIZZLE_W:
mask |= (1 << c);
break;
default:
assert(0);
}
}

if (mask == MASK_ALL)
return TRUE;

*src = temp(fpc);

if (mask)
arith(fpc, 0, MOV, *src, mask, tgsi, none, none);

return FALSE;
}

static boolean
nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
const struct tgsi_full_instruction *finst)
{
const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
struct nv30_sreg src[3], dst, tmp;
int mask, sat, unit = 0;
int ai = -1, ci = -1;
int i;

if (finst->Instruction.Opcode == TGSI_OPCODE_END)
return TRUE;

fpc->temp_temp_count = 0;
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;

fsrc = &finst->Src[i];
if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(fpc, fsrc);
}
}

for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;

fsrc = &finst->Src[i];

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
if (!src_native_swz(fpc, fsrc, &src[i]))
continue;
break;
default:
break;
}

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
if (ai == -1 || ai == fsrc->Register.Index) {
ai = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
NOUVEAU_MSG("extra src attr %d\n",
fsrc->Register.Index);
src[i] = temp(fpc);
arith(fpc, 0, MOV, src[i], MASK_ALL,
tgsi_src(fpc, fsrc), none, none);
}
break;
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
if (ci == -1 || ci == fsrc->Register.Index) {
ci = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
arith(fpc, 0, MOV, src[i], MASK_ALL,
tgsi_src(fpc, fsrc), none, none);
}
break;
case TGSI_FILE_TEMPORARY:
/* handled above */
break;
case TGSI_FILE_SAMPLER:
unit = fsrc->Register.Index;
break;
case TGSI_FILE_OUTPUT:
break;
default:
NOUVEAU_ERR("bad src file\n");
return FALSE;
}
}

dst = tgsi_dst(fpc, &finst->Dst[0]);
mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);

switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
break;
case TGSI_OPCODE_ADD:
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
tmp = nv30_sr(NV30SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
dst.cc_test = NV30_VP_INST_COND_GE;
arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV30_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
case TGSI_OPCODE_COS:
arith(fpc, sat, COS, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_DP3:
arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_DP4:
arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_DPH:
tmp = temp(fpc);
arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
swz(src[1], W, W, W, W), none);
break;
case TGSI_OPCODE_DST:
arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_EX2:
arith(fpc, sat, EX2, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_FLR:
arith(fpc, sat, FLR, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_FRC:
arith(fpc, sat, FRC, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_KILP:
arith(fpc, 0, KIL, none, 0, none, none, none);
break;
case TGSI_OPCODE_KIL:
dst = nv30_sr(NV30SR_NONE, 0);
dst.cc_update = 1;
arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
arith(fpc, 0, KIL, dst, 0, none, none, none);
break;
case TGSI_OPCODE_LG2:
arith(fpc, sat, LG2, dst, mask, src[0], none, none);
break;
// case TGSI_OPCODE_LIT:
case TGSI_OPCODE_LRP:
arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
break;
case TGSI_OPCODE_MAD:
arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
break;
case TGSI_OPCODE_MAX:
arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_MIN:
arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_MOV:
arith(fpc, sat, MOV, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_MUL:
arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_POW:
arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_RCP:
arith(fpc, sat, RCP, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_RET:
assert(0);
break;
case TGSI_OPCODE_RFL:
arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_RSQ:
arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
/* avoid overwriting the source */
if(src[0].swz[SWZ_X] != SWZ_X)
{
if (mask & MASK_X) {
arith(fpc, sat, COS, dst, MASK_X,
swz(src[0], X, X, X, X), none, none);
}
if (mask & MASK_Y) {
arith(fpc, sat, SIN, dst, MASK_Y,
swz(src[0], X, X, X, X), none, none);
}
}
else
{
if (mask & MASK_Y) {
arith(fpc, sat, SIN, dst, MASK_Y,
swz(src[0], X, X, X, X), none, none);
}
if (mask & MASK_X) {
arith(fpc, sat, COS, dst, MASK_X,
swz(src[0], X, X, X, X), none, none);
}
}
break;
case TGSI_OPCODE_SIN:
arith(fpc, sat, SIN, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_SGE:
arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_SGT:
arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_SLT:
arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_SUB:
arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
break;
case TGSI_OPCODE_TEX:
tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_TXB:
tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_TXP:
tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_XPD:
tmp = temp(fpc);
arith(fpc, 0, MUL, tmp, mask,
swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
neg(tmp));
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
return FALSE;
}

return TRUE;
}

static boolean
nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
int hw;

switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV30_FP_OP_INPUT_SRC_POSITION;
break;
case TGSI_SEMANTIC_COLOR:
if (fdec->Semantic.Index == 0) {
hw = NV30_FP_OP_INPUT_SRC_COL0;
} else
if (fdec->Semantic.Index == 1) {
hw = NV30_FP_OP_INPUT_SRC_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
return FALSE;
}
break;
case TGSI_SEMANTIC_FOG:
hw = NV30_FP_OP_INPUT_SRC_FOGC;
break;
case TGSI_SEMANTIC_GENERIC:
if (fdec->Semantic.Index <= 7) {
hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
}
break;
default:
NOUVEAU_ERR("bad input semantic\n");
return FALSE;
}

fpc->attrib_map[fdec->Range.First] = hw;
return TRUE;
}

static boolean
nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
fpc->depth_id = fdec->Range.First;
break;
case TGSI_SEMANTIC_COLOR:
fpc->colour_id = fdec->Range.First;
break;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
}

return TRUE;
}

static boolean
nv30_fragprog_prepare(struct nv30_fpc *fpc)
{
struct tgsi_parse_context p;
/*int high_temp = -1, i;*/

tgsi_parse_init(&p, fpc->fp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
const union tgsi_full_token *tok = &p.FullToken;

tgsi_parse_token(&p);
switch(tok->Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
{
const struct tgsi_full_declaration *fdec;
fdec = &p.FullToken.FullDeclaration;
switch (fdec->Declaration.File) {
case TGSI_FILE_INPUT:
if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
goto out_err;
break;
case TGSI_FILE_OUTPUT:
if (!nv30_fragprog_parse_decl_output(fpc, fdec))
goto out_err;
break;
/*case TGSI_FILE_TEMPORARY:
if (fdec->Range.Last > high_temp) {
high_temp =
fdec->Range.Last;
}
break;*/
default:
break;
}
}
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
struct tgsi_full_immediate *imm;
float vals[4];

imm = &p.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);

vals[0] = imm->u[0].Float;
vals[1] = imm->u[1].Float;
vals[2] = imm->u[2].Float;
vals[3] = imm->u[3].Float;
fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
}
break;
default:
break;
}
}
tgsi_parse_free(&p);

/*if (++high_temp) {
fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
for (i = 0; i < high_temp; i++)
fpc->r_temp[i] = temp(fpc);
fpc->r_temps_discard = 0;
}*/

return TRUE;

out_err:
/*if (fpc->r_temp)
FREE(fpc->r_temp);*/
tgsi_parse_free(&p);
return FALSE;
}

static void
nv30_fragprog_translate(struct nv30_context *nv30,
struct nv30_fragment_program *fp)
{
struct tgsi_parse_context parse;
struct nv30_fpc *fpc = NULL;

tgsi_dump(fp->pipe.tokens,0);

fpc = CALLOC(1, sizeof(struct nv30_fpc));
if (!fpc)
return;
fpc->fp = fp;
fpc->high_temp = -1;
fpc->num_regs = 2;

if (!nv30_fragprog_prepare(fpc)) {
FREE(fpc);
return;
}

tgsi_parse_init(&parse, fp->pipe.tokens);

while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);

switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
{
const struct tgsi_full_instruction *finst;

finst = &parse.FullToken.FullInstruction;
if (!nv30_fragprog_parse_instruction(fpc, finst))
goto out_err;
}
break;
default:
break;
}
}

fp->fp_control |= (fpc->num_regs-1)/2;
fp->fp_reg_control = (1<<16)|0x4;

/* Terminate final instruction */
fp->insn[fpc->inst_offset] |= 0x00000001;

/* Append NOP + END instruction, may or may not be necessary. */
fpc->inst_offset = fp->insn_len;
grow_insns(fpc, 4);
fp->insn[fpc->inst_offset + 0] = 0x00000001;
fp->insn[fpc->inst_offset + 1] = 0x00000000;
fp->insn[fpc->inst_offset + 2] = 0x00000000;
fp->insn[fpc->inst_offset + 3] = 0x00000000;

fp->translated = TRUE;
fp->on_hw = FALSE;
out_err:
tgsi_parse_free(&parse);
FREE(fpc);
}

static void
nv30_fragprog_upload(struct nv30_context *nv30,
struct nv30_fragment_program *fp)
{
struct pipe_screen *pscreen = nv30->pipe.screen;
const uint32_t le = 1;
uint32_t *map;
int i;

map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);

#if 0
for (i = 0; i < fp->insn_len; i++) {
fflush(stdout); fflush(stderr);
NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
fflush(stdout); fflush(stderr);
}
#endif

if ((*(const uint8_t *)&le)) {
for (i = 0; i < fp->insn_len; i++) {
map[i] = fp->insn[i];
}
} else {
/* Weird swapping for big-endian chips */
for (i = 0; i < fp->insn_len; i++) {
map[i] = ((fp->insn[i] & 0xffff) << 16) |
((fp->insn[i] >> 16) & 0xffff);
}
}

pipe_buffer_unmap(pscreen, fp->buffer);
}

static boolean
nv30_fragprog_validate(struct nv30_context *nv30)
{
struct nv30_fragment_program *fp = nv30->fragprog;
struct pipe_buffer *constbuf =
nv30->constbuf[PIPE_SHADER_FRAGMENT];
struct pipe_screen *pscreen = nv30->pipe.screen;
struct nouveau_stateobj *so;
boolean new_consts = FALSE;
int i;

if (fp->translated)
goto update_constants;

/*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
nv30_fragprog_translate(nv30, fp);
if (!fp->translated) {
/*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
return FALSE;
}

fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
nv30_fragprog_upload(nv30, fp);

so = so_new(4, 4, 1);
so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
so_data (so, fp->fp_control);
so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
so_data (so, fp->fp_reg_control);
so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
so_data (so, fp->samplers);
so_ref(so, &fp->so);
so_ref(NULL, &so);

update_constants:
if (fp->nr_consts) {
float *map;

map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < fp->nr_consts; i++) {
struct nv30_fragment_program_data *fpd = &fp->consts[i];
uint32_t *p = &fp->insn[fpd->offset];
uint32_t *cb = (uint32_t *)&map[fpd->index * 4];

if (!memcmp(p, cb, 4 * sizeof(float)))
continue;
memcpy(p, cb, 4 * sizeof(float));
new_consts = TRUE;
}
pipe_buffer_unmap(pscreen, constbuf);

if (new_consts)
nv30_fragprog_upload(nv30, fp);
}

if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
return TRUE;
}

return FALSE;
}

void
nv30_fragprog_destroy(struct nv30_context *nv30,
struct nv30_fragment_program *fp)
{
if (fp->buffer)
pipe_buffer_reference(&fp->buffer, NULL);

if (fp->so)
so_ref(NULL, &fp->so);

if (fp->insn_len)
FREE(fp->insn);
}

struct nv30_state_entry nv30_state_fragprog = {
.validate = nv30_fragprog_validate,
.dirty = {
.pipe = NV30_NEW_FRAGPROG,
.hw = NV30_STATE_FRAGPROG
}
};

+ 0
- 240
src/gallium/drivers/nv30/nv30_miptree.c 파일 보기

@@ -1,240 +0,0 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_math.h"

#include "nv30_context.h"
#include "../nouveau/nv04_surface_2d.h"

static void
nv30_miptree_layout(struct nv30_miptree *nv30mt)
{
struct pipe_texture *pt = &nv30mt->base;
uint width = pt->width0;
uint offset = 0;
int nr_faces, l, f;
uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
PIPE_TEXTURE_USAGE_RENDER_TARGET |
PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
PIPE_TEXTURE_USAGE_SCANOUT);

if (pt->target == PIPE_TEXTURE_CUBE) {
nr_faces = 6;
} else
if (pt->target == PIPE_TEXTURE_3D) {
nr_faces = pt->depth0;
} else {
nr_faces = 1;
}

for (l = 0; l <= pt->last_level; l++) {
if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
else
nv30mt->level[l].pitch = util_format_get_stride(pt->format, width);

nv30mt->level[l].image_offset =
CALLOC(nr_faces, sizeof(unsigned));

width = u_minify(width, 1);
}

for (f = 0; f < nr_faces; f++) {
for (l = 0; l < pt->last_level; l++) {
nv30mt->level[l].image_offset[f] = offset;

if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64);
else
offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
}

nv30mt->level[l].image_offset[f] = offset;
offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
}

nv30mt->total_size = offset;
}

static struct pipe_texture *
nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
{
struct nv30_miptree *mt;
unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL |
NOUVEAU_BUFFER_USAGE_TEXTURE;

mt = MALLOC(sizeof(struct nv30_miptree));
if (!mt)
return NULL;
mt->base = *pt;
pipe_reference_init(&mt->base.reference, 1);
mt->base.screen = pscreen;

/* Swizzled textures must be POT */
if (pt->width0 & (pt->width0 - 1) ||
pt->height0 & (pt->height0 - 1))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else
if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT |
PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
PIPE_TEXTURE_USAGE_DEPTH_STENCIL))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else {
switch (pt->format) {
/* TODO: Figure out which formats can be swizzled */
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R16_SNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
{
if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
break;
}
default:
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
}
}

if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;

/* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
* If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
* This also happens for small mipmaps of large textures. */
if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;

nv30_miptree_layout(mt);

mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage,
mt->total_size);
if (!mt->buffer) {
FREE(mt);
return NULL;
}
mt->bo = nouveau_bo(mt->buffer);

return &mt->base;
}

static struct pipe_texture *
nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
const unsigned *stride, struct pipe_buffer *pb)
{
struct nv30_miptree *mt;

/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
pt->depth0 != 1)
return NULL;

mt = CALLOC_STRUCT(nv30_miptree);
if (!mt)
return NULL;

mt->base = *pt;
pipe_reference_init(&mt->base.reference, 1);
mt->base.screen = pscreen;
mt->level[0].pitch = stride[0];
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));

/* Assume whoever created this buffer expects it to be linear for now */
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;

pipe_buffer_reference(&mt->buffer, pb);
mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}

static void
nv30_miptree_destroy(struct pipe_texture *pt)
{
struct nv30_miptree *mt = (struct nv30_miptree *)pt;
int l;

pipe_buffer_reference(&mt->buffer, NULL);
for (l = 0; l <= pt->last_level; l++) {
if (mt->level[l].image_offset)
FREE(mt->level[l].image_offset);
}

FREE(mt);
}

static struct pipe_surface *
nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
unsigned flags)
{
struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt;
struct nv04_surface *ns;

ns = CALLOC_STRUCT(nv04_surface);
if (!ns)
return NULL;
pipe_texture_reference(&ns->base.texture, pt);
ns->base.format = pt->format;
ns->base.width = u_minify(pt->width0, level);
ns->base.height = u_minify(pt->height0, level);
ns->base.usage = flags;
pipe_reference_init(&ns->base.reference, 1);
ns->base.face = face;
ns->base.level = level;
ns->base.zslice = zslice;
ns->pitch = nv30mt->level[level].pitch;

if (pt->target == PIPE_TEXTURE_CUBE) {
ns->base.offset = nv30mt->level[level].image_offset[face];
} else
if (pt->target == PIPE_TEXTURE_3D) {
ns->base.offset = nv30mt->level[level].image_offset[zslice];
} else {
ns->base.offset = nv30mt->level[level].image_offset[0];
}

/* create a linear temporary that we can render into if necessary.
* Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
* ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base;

return &ns->base;
}

static void
nv30_miptree_surface_del(struct pipe_surface *ps)
{
struct nv04_surface* ns = (struct nv04_surface*)ps;
if(ns->backing)
{
struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen;
if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
nv30_miptree_surface_del(&ns->backing->base);
}

pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}

void
nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
{
pscreen->texture_create = nv30_miptree_create;
pscreen->texture_destroy = nv30_miptree_destroy;
pscreen->get_tex_surface = nv30_miptree_surface_new;
pscreen->tex_surface_destroy = nv30_miptree_surface_del;

nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket;
}

+ 0
- 127
src/gallium/drivers/nv30/nv30_query.c 파일 보기

@@ -1,127 +0,0 @@
#include "pipe/p_context.h"

#include "nv30_context.h"

struct nv30_query {
struct nouveau_resource *object;
unsigned type;
boolean ready;
uint64_t result;
};

static INLINE struct nv30_query *
nv30_query(struct pipe_query *pipe)
{
return (struct nv30_query *)pipe;
}

static struct pipe_query *
nv30_query_create(struct pipe_context *pipe, unsigned query_type)
{
struct nv30_query *q;

q = CALLOC(1, sizeof(struct nv30_query));
q->type = query_type;

return (struct pipe_query *)q;
}

static void
nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_query *q = nv30_query(pq);

if (q->object)
nouveau_resource_free(&q->object);
FREE(q);
}

static void
nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_query *q = nv30_query(pq);
struct nv30_screen *screen = nv30->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;

assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);

/* Happens when end_query() is called, then another begin_query()
* without querying the result in-between. For now we'll wait for
* the existing query to notify completion, but it could be better.
*/
if (q->object) {
uint64_t tmp;
pipe->get_query_result(pipe, pq, 1, &tmp);
}

if (nouveau_resource_alloc(nv30->screen->query_heap, 1, NULL, &q->object))
assert(0);
nouveau_notifier_reset(nv30->screen->query, q->object->start);

BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1);
OUT_RING (chan, 1);

q->ready = FALSE;
}

static void
nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_screen *screen = nv30->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nv30_query *q = nv30_query(pq);

BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1);
OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
FIRE_RING(chan);
}

static boolean
nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, uint64_t *result)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_query *q = nv30_query(pq);

assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);

if (!q->ready) {
unsigned status;

status = nouveau_notifier_status(nv30->screen->query,
q->object->start);
if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
if (wait == FALSE)
return FALSE;

nouveau_notifier_wait_status(nv30->screen->query,
q->object->start,
NV_NOTIFY_STATE_STATUS_COMPLETED, 0);
}

q->result = nouveau_notifier_return_val(nv30->screen->query,
q->object->start);
q->ready = TRUE;
nouveau_resource_free(&q->object);
}

*result = q->result;
return TRUE;
}

void
nv30_init_query_functions(struct nv30_context *nv30)
{
nv30->pipe.create_query = nv30_query_create;
nv30->pipe.destroy_query = nv30_query_destroy;
nv30->pipe.begin_query = nv30_query_begin;
nv30->pipe.end_query = nv30_query_end;
nv30->pipe.get_query_result = nv30_query_result;
}

+ 0
- 490
src/gallium/drivers/nv30/nv30_shader.h 파일 보기

@@ -1,490 +0,0 @@
#ifndef __NV30_SHADER_H__
#define __NV30_SHADER_H__

/* Vertex programs instruction set
*
* 128bit opcodes, split into 4 32-bit ones for ease of use.
*
* Non-native instructions
* ABS - MOV + NV40_VP_INST0_DEST_ABS
* POW - EX2 + MUL + LG2
* SUB - ADD, second source negated
* SWZ - MOV
* XPD -
*
* Register access
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
* - Only one CONST can be accessed per-instruction (move extras into TEMPs)
*
* Relative Addressing
* According to the value returned for
* MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
*
* there are only two address registers available. The destination in the
* ARL instruction is set to TEMP <n> (The temp isn't actually written).
*
* When using vanilla ARB_v_p, the proprietary driver will squish both the
* available ADDRESS regs into the first hardware reg in the X and Y
* components.
*
* To use an address reg as an index into consts, the CONST_SRC is set to
* (const_base + offset) and INDEX_CONST is set.
*
* To access the second address reg use ADDR_REG_SELECT_1. A particular
* component of the address regs is selected with ADDR_SWZ.
*
* Only one address register can be accessed per instruction.
*
* Conditional execution (see NV_vertex_program{2,3} for details) Conditional
* execution of an instruction is enabled by setting COND_TEST_ENABLE, and
* selecting the condition which will allow the test to pass with
* COND_{FL,LT,...}. It is possible to swizzle the values in the condition
* register, which allows for testing against an individual component.
*
* Branching:
*
* The BRA/CAL instructions seem to follow a slightly different opcode
* layout. The destination instruction ID (IADDR) overlaps a source field.
* Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
* command, and is incremented automatically on each UPLOAD_INST FIFO
* command.
*
* Conditional branching is achieved by using the condition tests described
* above. There doesn't appear to be dedicated looping instructions, but
* this can be done using a temp reg + conditional branching.
*
* Subroutines may be uploaded before the main program itself, but the first
* executed instruction is determined by the PROGRAM_START_ID FIFO command.
*
*/

/* DWORD 0 */

#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
#define NV30_VP_INST_VEC_RESULT (1 << 20)
#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
#define NV30_VP_INST_COND_SHIFT 11
#define NV30_VP_INST_COND_MASK (0x07 << 11)
# define NV30_VP_INST_COND_FL 0 /* guess */
# define NV30_VP_INST_COND_LT 1
# define NV30_VP_INST_COND_EQ 2
# define NV30_VP_INST_COND_LE 3
# define NV30_VP_INST_COND_GT 4
# define NV30_VP_INST_COND_NE 5
# define NV30_VP_INST_COND_GE 6
# define NV30_VP_INST_COND_TR 7 /* guess */
#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)

/* DWORD 1 */
#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
# define NV30_VP_INST_OP_NOP 0x00
# define NV30_VP_INST_OP_RCP 0x02
# define NV30_VP_INST_OP_RCC 0x03
# define NV30_VP_INST_OP_RSQ 0x04
# define NV30_VP_INST_OP_EXP 0x05
# define NV30_VP_INST_OP_LOG 0x06
# define NV30_VP_INST_OP_LIT 0x07
# define NV30_VP_INST_OP_BRA 0x09
# define NV30_VP_INST_OP_CAL 0x0B
# define NV30_VP_INST_OP_RET 0x0C
# define NV30_VP_INST_OP_LG2 0x0D
# define NV30_VP_INST_OP_EX2 0x0E
# define NV30_VP_INST_OP_SIN 0x0F
# define NV30_VP_INST_OP_COS 0x10
#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
# define NV30_VP_INST_OP_NOPV 0x00
# define NV30_VP_INST_OP_MOV 0x01
# define NV30_VP_INST_OP_MUL 0x02
# define NV30_VP_INST_OP_ADD 0x03
# define NV30_VP_INST_OP_MAD 0x04
# define NV30_VP_INST_OP_DP3 0x05
# define NV30_VP_INST_OP_DP4 0x07
# define NV30_VP_INST_OP_DPH 0x06
# define NV30_VP_INST_OP_DST 0x08
# define NV30_VP_INST_OP_MIN 0x09
# define NV30_VP_INST_OP_MAX 0x0A
# define NV30_VP_INST_OP_SLT 0x0B
# define NV30_VP_INST_OP_SGE 0x0C
# define NV30_VP_INST_OP_ARL 0x0D
# define NV30_VP_INST_OP_FRC 0x0E
# define NV30_VP_INST_OP_FLR 0x0F
# define NV30_VP_INST_OP_SEQ 0x10
# define NV30_VP_INST_OP_SFL 0x11
# define NV30_VP_INST_OP_SGT 0x12
# define NV30_VP_INST_OP_SLE 0x13
# define NV30_VP_INST_OP_SNE 0x14
# define NV30_VP_INST_OP_STR 0x15
# define NV30_VP_INST_OP_SSG 0x16
# define NV30_VP_INST_OP_ARR 0x17
# define NV30_VP_INST_OP_ARA 0x18
#define NV30_VP_INST_CONST_SRC_SHIFT 14
#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
# define NV30_VP_INST_IN_NORMAL 2
# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
# define NV30_VP_INST_IN_COL1 4
# define NV30_VP_INST_IN_FOGC 5
# define NV30_VP_INST_IN_TC0 8
# define NV30_VP_INST_IN_TC(n) (8+n)
#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/

/* Please note: the IADDR fields overlap other fields because they are used
* only for branch instructions. See Branching: label above
*
* DWORD 2
*/
#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
#define NV30_VP_INST_IADDR_SHIFT 2
#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */

/* DWORD 3 */
#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
#define NV30_VP_INST_DEST_SHIFT 2
#define NV30_VP_INST_DEST_MASK (0x0F << 2)
# define NV30_VP_INST_DEST_POS 0
# define NV30_VP_INST_DEST_BFC0 1
# define NV30_VP_INST_DEST_BFC1 2
# define NV30_VP_INST_DEST_COL0 3
# define NV30_VP_INST_DEST_COL1 4
# define NV30_VP_INST_DEST_FOGC 5
# define NV30_VP_INST_DEST_PSZ 6
# define NV30_VP_INST_DEST_TC(n) (8+n)

#define NV30_VP_INST_LAST (1 << 0)

/* Useful to split the source selection regs into their pieces */
#define NV30_VP_SRC0_HIGH_SHIFT 6
#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
#define NV30_VP_SRC0_LOW_MASK 0x0000003F
#define NV30_VP_SRC2_HIGH_SHIFT 4
#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
#define NV30_VP_SRC2_LOW_MASK 0x0000000F


/* Source-register definition - matches NV20 exactly */
#define NV30_VP_SRC_NEGATE (1<<14)
#define NV30_VP_SRC_SWZ_X_SHIFT 12
#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
#define NV30_VP_SRC_SWZ_Y_SHIFT 10
#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
#define NV30_VP_SRC_SWZ_Z_SHIFT 8
#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
#define NV30_VP_SRC_SWZ_W_SHIFT 6
#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
#define NV30_VP_SRC_REG_TYPE_SHIFT 0
#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
#define NV30_VP_SRC_REG_TYPE_TEMP 1
#define NV30_VP_SRC_REG_TYPE_INPUT 2
#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */

/*
* Each fragment program opcode appears to be comprised of 4 32-bit values.
*
* 0 - Opcode, output reg/mask, ATTRIB source
* 1 - Source 0
* 2 - Source 1
* 3 - Source 2
*
* There appears to be no special difference between result regs and temp regs.
* result.color == R0.xyzw
* result.depth == R1.z
* When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
* otherwise it is set to 1.
*
* Constants are inserted directly after the instruction that uses them.
*
* It appears that it's not possible to use two input registers in one
* instruction as the input sourcing is done in the instruction dword
* and not the source selection dwords. As such instructions such as:
*
* ADD result.color, fragment.color, fragment.texcoord[0];
*
* must be split into two MOV's and then an ADD (nvidia does this) but
* I'm not sure why it's not just one MOV and then source the second input
* in the ADD instruction..
*
* Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
* negation requires multiplication with a const.
*
* Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
* The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
* is implemented simply by not writing to the relevant components of the destination.
*
* Conditional execution
* TODO
*
* Non-native instructions:
* LIT
* LRP - MAD+MAD
* SUB - ADD, negate second source
* RSQ - LG2 + EX2
* POW - LG2 + MUL + EX2
* SCS - COS + SIN
* XPD
*/

//== Opcode / Destination selection ==
#define NV30_FP_OP_PROGRAM_END (1 << 0)
#define NV30_FP_OP_OUT_REG_SHIFT 1
#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
/* Needs to be set when writing outputs to get expected result.. */
#define NV30_FP_OP_OUT_REG_HALF (1 << 7)
#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8)
#define NV30_FP_OP_OUTMASK_SHIFT 9
#define NV30_FP_OP_OUTMASK_MASK (0xF << 9)
# define NV30_FP_OP_OUT_X (1<<9)
# define NV30_FP_OP_OUT_Y (1<<10)
# define NV30_FP_OP_OUT_Z (1<<11)
# define NV30_FP_OP_OUT_W (1<<12)
/* Uncertain about these, especially the input_src values.. it's possible that
* they can be dynamically changed.
*/
#define NV30_FP_OP_INPUT_SRC_SHIFT 13
#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13)
# define NV30_FP_OP_INPUT_SRC_POSITION 0x0
# define NV30_FP_OP_INPUT_SRC_COL0 0x1
# define NV30_FP_OP_INPUT_SRC_COL1 0x2
# define NV30_FP_OP_INPUT_SRC_FOGC 0x3
# define NV30_FP_OP_INPUT_SRC_TC0 0x4
# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
#define NV30_FP_OP_TEX_UNIT_SHIFT 17
#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
#define NV30_FP_OP_PRECISION_SHIFT 22
#define NV30_FP_OP_PRECISION_MASK (3 << 22)
# define NV30_FP_PRECISION_FP32 0
# define NV30_FP_PRECISION_FP16 1
# define NV30_FP_PRECISION_FX12 2
#define NV30_FP_OP_OPCODE_SHIFT 24
#define NV30_FP_OP_OPCODE_MASK (0x3F << 24)
# define NV30_FP_OP_OPCODE_NOP 0x00
# define NV30_FP_OP_OPCODE_MOV 0x01
# define NV30_FP_OP_OPCODE_MUL 0x02
# define NV30_FP_OP_OPCODE_ADD 0x03
# define NV30_FP_OP_OPCODE_MAD 0x04
# define NV30_FP_OP_OPCODE_DP3 0x05
# define NV30_FP_OP_OPCODE_DP4 0x06
# define NV30_FP_OP_OPCODE_DST 0x07
# define NV30_FP_OP_OPCODE_MIN 0x08
# define NV30_FP_OP_OPCODE_MAX 0x09
# define NV30_FP_OP_OPCODE_SLT 0x0A
# define NV30_FP_OP_OPCODE_SGE 0x0B
# define NV30_FP_OP_OPCODE_SLE 0x0C
# define NV30_FP_OP_OPCODE_SGT 0x0D
# define NV30_FP_OP_OPCODE_SNE 0x0E
# define NV30_FP_OP_OPCODE_SEQ 0x0F
# define NV30_FP_OP_OPCODE_FRC 0x10
# define NV30_FP_OP_OPCODE_FLR 0x11
# define NV30_FP_OP_OPCODE_KIL 0x12
# define NV30_FP_OP_OPCODE_PK4B 0x13
# define NV30_FP_OP_OPCODE_UP4B 0x14
# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
# define NV30_FP_OP_OPCODE_TEX 0x17
# define NV30_FP_OP_OPCODE_TXP 0x18
# define NV30_FP_OP_OPCODE_TXD 0x19
# define NV30_FP_OP_OPCODE_RCP 0x1A
# define NV30_FP_OP_OPCODE_RSQ 0x1B
# define NV30_FP_OP_OPCODE_EX2 0x1C
# define NV30_FP_OP_OPCODE_LG2 0x1D
# define NV30_FP_OP_OPCODE_LIT 0x1E
# define NV30_FP_OP_OPCODE_LRP 0x1F
# define NV30_FP_OP_OPCODE_STR 0x20
# define NV30_FP_OP_OPCODE_SFL 0x21
# define NV30_FP_OP_OPCODE_COS 0x22
# define NV30_FP_OP_OPCODE_SIN 0x23
# define NV30_FP_OP_OPCODE_PK2H 0x24
# define NV30_FP_OP_OPCODE_UP2H 0x25
# define NV30_FP_OP_OPCODE_POW 0x26
# define NV30_FP_OP_OPCODE_PK4UB 0x27
# define NV30_FP_OP_OPCODE_UP4UB 0x28
# define NV30_FP_OP_OPCODE_PK2US 0x29
# define NV30_FP_OP_OPCODE_UP2US 0x2A
# define NV30_FP_OP_OPCODE_DP2A 0x2E
# define NV30_FP_OP_OPCODE_TXB 0x31
# define NV30_FP_OP_OPCODE_RFL 0x36
# define NV30_FP_OP_OPCODE_DIV 0x3A
#define NV30_FP_OP_OUT_SAT (1 << 31)

/* high order bits of SRC0 */
#define NV30_FP_OP_OUT_ABS (1 << 29)
#define NV30_FP_OP_COND_SWZ_W_SHIFT 27
#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27)
#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25
#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25)
#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23
#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23)
#define NV30_FP_OP_COND_SWZ_X_SHIFT 21
#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21)
#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21
#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
#define NV30_FP_OP_COND_SHIFT 18
#define NV30_FP_OP_COND_MASK (0x07 << 18)
# define NV30_FP_OP_COND_FL 0
# define NV30_FP_OP_COND_LT 1
# define NV30_FP_OP_COND_EQ 2
# define NV30_FP_OP_COND_LE 3
# define NV30_FP_OP_COND_GT 4
# define NV30_FP_OP_COND_NE 5
# define NV30_FP_OP_COND_GE 6
# define NV30_FP_OP_COND_TR 7

/* high order bits of SRC1 */
#define NV30_FP_OP_DST_SCALE_SHIFT 28
#define NV30_FP_OP_DST_SCALE_MASK (3 << 28)
#define NV30_FP_OP_DST_SCALE_1X 0
#define NV30_FP_OP_DST_SCALE_2X 1
#define NV30_FP_OP_DST_SCALE_4X 2
#define NV30_FP_OP_DST_SCALE_8X 3
#define NV30_FP_OP_DST_SCALE_INV_2X 5
#define NV30_FP_OP_DST_SCALE_INV_4X 6
#define NV30_FP_OP_DST_SCALE_INV_8X 7


/* high order bits of SRC2 */
#define NV30_FP_OP_INDEX_INPUT (1 << 30)

//== Register selection ==
#define NV30_FP_REG_TYPE_SHIFT 0
#define NV30_FP_REG_TYPE_MASK (3 << 0)
# define NV30_FP_REG_TYPE_TEMP 0
# define NV30_FP_REG_TYPE_INPUT 1
# define NV30_FP_REG_TYPE_CONST 2
#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */
#define NV30_FP_REG_SRC_MASK (31 << 2)
#define NV30_FP_REG_SRC_HALF (1 << 8)
#define NV30_FP_REG_SWZ_ALL_SHIFT 9
#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9)
#define NV30_FP_REG_SWZ_X_SHIFT 9
#define NV30_FP_REG_SWZ_X_MASK (3 << 9)
#define NV30_FP_REG_SWZ_Y_SHIFT 11
#define NV30_FP_REG_SWZ_Y_MASK (3 << 11)
#define NV30_FP_REG_SWZ_Z_SHIFT 13
#define NV30_FP_REG_SWZ_Z_MASK (3 << 13)
#define NV30_FP_REG_SWZ_W_SHIFT 15
#define NV30_FP_REG_SWZ_W_MASK (3 << 15)
# define NV30_FP_SWIZZLE_X 0
# define NV30_FP_SWIZZLE_Y 1
# define NV30_FP_SWIZZLE_Z 2
# define NV30_FP_SWIZZLE_W 3
#define NV30_FP_REG_NEGATE (1 << 17)

#define NV30SR_NONE 0
#define NV30SR_OUTPUT 1
#define NV30SR_INPUT 2
#define NV30SR_TEMP 3
#define NV30SR_CONST 4

struct nv30_sreg {
int type;
int index;

int dst_scale;

int negate;
int abs;
int swz[4];

int cc_update;
int cc_update_reg;
int cc_test;
int cc_test_reg;
int cc_swz[4];
};

static INLINE struct nv30_sreg
nv30_sr(int type, int index)
{
struct nv30_sreg temp = {
.type = type,
.index = index,
.dst_scale = DEF_SCALE,
.abs = 0,
.negate = 0,
.swz = { 0, 1, 2, 3 },
.cc_update = 0,
.cc_update_reg = 0,
.cc_test = DEF_CTEST,
.cc_test_reg = 0,
.cc_swz = { 0, 1, 2, 3 },
};
return temp;
}

static INLINE struct nv30_sreg
nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w)
{
struct nv30_sreg dst = src;

dst.swz[SWZ_X] = src.swz[x];
dst.swz[SWZ_Y] = src.swz[y];
dst.swz[SWZ_Z] = src.swz[z];
dst.swz[SWZ_W] = src.swz[w];
return dst;
}

static INLINE struct nv30_sreg
nv30_sr_neg(struct nv30_sreg src)
{
src.negate = !src.negate;
return src;
}

static INLINE struct nv30_sreg
nv30_sr_abs(struct nv30_sreg src)
{
src.abs = 1;
return src;
}

static INLINE struct nv30_sreg
nv30_sr_scale(struct nv30_sreg src, int scale)
{
src.dst_scale = scale;
return src;
}

#endif

+ 0
- 782
src/gallium/drivers/nv30/nv30_state.c 파일 보기

@@ -1,782 +0,0 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"

#include "tgsi/tgsi_parse.h"

#include "nv30_context.h"
#include "nv30_state.h"

static void *
nv30_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
struct nouveau_stateobj *so = so_new(5, 8, 0);

if (cso->rt[0].blend_enable) {
so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
so_data (so, 1);
so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) |
nvgl_blend_func(cso->rt[0].rgb_src_factor));
so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 |
nvgl_blend_func(cso->rt[0].rgb_dst_factor));
/* FIXME: Gallium assumes GL_EXT_blend_func_separate.
It is not the case for NV30 */
so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1);
so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
} else {
so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1);
so_data (so, 0);
}

so_method(so, rankine, NV34TCL_COLOR_MASK, 1);
so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));

if (cso->logicop_enable) {
so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
so_data (so, 1);
so_data (so, nvgl_logicop_func(cso->logicop_func));
} else {
so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1);
so_data (so, 0);
}

so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1);
so_data (so, cso->dither ? 1 : 0);

so_ref(so, &bso->so);
so_ref(NULL, &so);
bso->pipe = *cso;
return (void *)bso;
}

static void
nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->blend = hwcso;
nv30->dirty |= NV30_NEW_BLEND;
}

static void
nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv30_blend_state *bso = hwcso;

so_ref(NULL, &bso->so);
FREE(bso);
}


static INLINE unsigned
wrap_mode(unsigned wrap) {
unsigned ret;

switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
ret = NV34TCL_TX_WRAP_S_REPEAT;
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_CLAMP:
ret = NV34TCL_TX_WRAP_S_CLAMP;
break;
/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP;
break;*/
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
ret = NV34TCL_TX_WRAP_S_REPEAT;
break;
}

return ret >> NV34TCL_TX_WRAP_S_SHIFT;
}

static void *
nv30_sampler_state_create(struct pipe_context *pipe,
const struct pipe_sampler_state *cso)
{
struct nv30_sampler_state *ps;
uint32_t filter = 0;

ps = MALLOC(sizeof(struct nv30_sampler_state));

ps->fmt = 0;
/* TODO: Not all RECTs formats have this bit set, bits 15-8 of format
are the tx format to use. We should store normalized coord flag
in sampler state structure, and set appropriate format in
nvxx_fragtex_build()
*/
/*NV34TCL_TX_FORMAT_RECT*/
/*if (!cso->normalized_coords) {
ps->fmt |= (1<<14) ;
}*/

ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
(wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
(wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT));

ps->en = 0;

if (cso->max_anisotropy >= 8) {
ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
} else
if (cso->max_anisotropy >= 4) {
ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
} else
if (cso->max_anisotropy >= 2) {
ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
}

switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
break;
}

switch (cso->min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
break;
}
break;
case PIPE_TEX_FILTER_NEAREST:
default:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
break;
}
break;
}

ps->filt = filter;

{
float limit;

limit = CLAMP(cso->lod_bias, -16.0, 15.0);
ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;

limit = CLAMP(cso->max_lod, 0.0, 15.0);
ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;

limit = CLAMP(cso->min_lod, 0.0, 15.0);
ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
}

if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
switch (cso->compare_func) {
case PIPE_FUNC_NEVER:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER;
break;
case PIPE_FUNC_GREATER:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER;
break;
case PIPE_FUNC_EQUAL:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL;
break;
case PIPE_FUNC_GEQUAL:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL;
break;
case PIPE_FUNC_LESS:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS;
break;
case PIPE_FUNC_NOTEQUAL:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
break;
case PIPE_FUNC_LEQUAL:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL;
break;
case PIPE_FUNC_ALWAYS:
ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS;
break;
default:
break;
}
}

ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
(float_to_ubyte(cso->border_color[0]) << 16) |
(float_to_ubyte(cso->border_color[1]) << 8) |
(float_to_ubyte(cso->border_color[2]) << 0));

return (void *)ps;
}

static void
nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
{
struct nv30_context *nv30 = nv30_context(pipe);
unsigned unit;

for (unit = 0; unit < nr; unit++) {
nv30->tex_sampler[unit] = sampler[unit];
nv30->dirty_samplers |= (1 << unit);
}

for (unit = nr; unit < nv30->nr_samplers; unit++) {
nv30->tex_sampler[unit] = NULL;
nv30->dirty_samplers |= (1 << unit);
}

nv30->nr_samplers = nr;
nv30->dirty |= NV30_NEW_SAMPLER;
}

static void
nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}

static void
nv30_set_fragment_sampler_views(struct pipe_context *pipe,
unsigned nr,
struct pipe_sampler_view **views)
{
struct nv30_context *nv30 = nv30_context(pipe);
unsigned unit;

for (unit = 0; unit < nr; unit++) {
pipe_sampler_view_reference(&nv30->fragment_sampler_views[unit], views[unit]);
pipe_texture_reference((struct pipe_texture **)
&nv30->tex_miptree[unit], views[unit]->texture);
nv30->dirty_samplers |= (1 << unit);
}

for (unit = nr; unit < nv30->nr_textures; unit++) {
pipe_sampler_view_reference(&nv30->fragment_sampler_views[unit], NULL);
pipe_texture_reference((struct pipe_texture **)
&nv30->tex_miptree[unit], NULL);
nv30->dirty_samplers |= (1 << unit);
}

nv30->nr_textures = nr;
nv30->dirty |= NV30_NEW_SAMPLER;
}

static struct pipe_sampler_view *
nv30_create_sampler_view(struct pipe_context *pipe,
struct pipe_texture *texture,
const struct pipe_sampler_view *templ)
{
struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);

if (view) {
*view = *templ;
view->reference.count = 1;
view->texture = NULL;
pipe_texture_reference(&view->texture, texture);
view->context = pipe;
}

return view;
}


static void
nv30_sampler_view_destroy(struct pipe_context *pipe,
struct pipe_sampler_view *view)
{
pipe_texture_reference(&view->texture, NULL);
FREE(view);
}

static void *
nv30_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
struct nouveau_stateobj *so = so_new(9, 19, 0);
struct nouveau_grobj *rankine = nv30->screen->rankine;

/*XXX: ignored:
* light_twoside
* point_smooth -nohw
* multisample
*/

so_method(so, rankine, NV34TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT :
NV34TCL_SHADE_MODEL_SMOOTH);

so_method(so, rankine, NV34TCL_LINE_WIDTH, 2);
so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
so_data (so, cso->line_smooth ? 1 : 0);
so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2);
so_data (so, cso->line_stipple_enable ? 1 : 0);
so_data (so, (cso->line_stipple_pattern << 16) |
cso->line_stipple_factor);

so_method(so, rankine, NV34TCL_POINT_SIZE, 1);
so_data (so, fui(cso->point_size));

so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6);
if (cso->front_winding == PIPE_WINDING_CCW) {
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
so_data(so, nvgl_polygon_mode(cso->fill_cw));
switch (cso->cull_mode) {
case PIPE_WINDING_CCW:
so_data(so, NV34TCL_CULL_FACE_FRONT);
break;
case PIPE_WINDING_CW:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
case PIPE_WINDING_BOTH:
so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV34TCL_FRONT_FACE_CCW);
} else {
so_data(so, nvgl_polygon_mode(cso->fill_cw));
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
switch (cso->cull_mode) {
case PIPE_WINDING_CCW:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
case PIPE_WINDING_CW:
so_data(so, NV34TCL_CULL_FACE_FRONT);
break;
case PIPE_WINDING_BOTH:
so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV34TCL_FRONT_FACE_CW);
}
so_data(so, cso->poly_smooth ? 1 : 0);
so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);

so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, cso->poly_stipple_enable ? 1 : 0);

so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
so_data(so, 1);
else
so_data(so, 0);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
so_data(so, 1);
else
so_data(so, 0);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
so_data(so, 1);
else
so_data(so, 0);
if (cso->offset_cw || cso->offset_ccw) {
so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2);
so_data (so, fui(cso->offset_scale));
so_data (so, fui(cso->offset_units * 2));
}

so_method(so, rankine, NV34TCL_POINT_SPRITE, 1);
if (cso->point_quad_rasterization) {
unsigned psctl = (1 << 0), i;

for (i = 0; i < 8; i++) {
if ((cso->sprite_coord_enable >> i) & 1)
psctl |= (1 << (8 + i));
}

so_data(so, psctl);
} else {
so_data(so, 0);
}

so_ref(so, &rsso->so);
so_ref(NULL, &so);
rsso->pipe = *cso;
return (void *)rsso;
}

static void
nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->rasterizer = hwcso;
nv30->dirty |= NV30_NEW_RAST;
/*nv30->draw_dirty |= NV30_NEW_RAST;*/
}

static void
nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv30_rasterizer_state *rsso = hwcso;

so_ref(NULL, &rsso->so);
FREE(rsso);
}

static void *
nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *cso)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
struct nouveau_stateobj *so = so_new(6, 20, 0);
struct nouveau_grobj *rankine = nv30->screen->rankine;

so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
so_data (so, nvgl_comparison_op(cso->depth.func));
so_data (so, cso->depth.writemask ? 1 : 0);
so_data (so, cso->depth.enabled ? 1 : 0);

so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3);
so_data (so, cso->alpha.enabled ? 1 : 0);
so_data (so, nvgl_comparison_op(cso->alpha.func));
so_data (so, float_to_ubyte(cso->alpha.ref_value));

if (cso->stencil[0].enabled) {
so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 3);
so_data (so, cso->stencil[0].enabled ? 1 : 0);
so_data (so, cso->stencil[0].writemask);
so_data (so, nvgl_comparison_op(cso->stencil[0].func));
so_method(so, rankine, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4);
so_data (so, cso->stencil[0].valuemask);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
} else {
so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}

if (cso->stencil[1].enabled) {
so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 3);
so_data (so, cso->stencil[1].enabled ? 1 : 0);
so_data (so, cso->stencil[1].writemask);
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
so_method(so, rankine, NV34TCL_STENCIL_BACK_FUNC_MASK, 4);
so_data (so, cso->stencil[1].valuemask);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
} else {
so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}

so_ref(so, &zsaso->so);
so_ref(NULL, &so);
zsaso->pipe = *cso;
return (void *)zsaso;
}

static void
nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->zsa = hwcso;
nv30->dirty |= NV30_NEW_ZSA;
}

static void
nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv30_zsa_state *zsaso = hwcso;

so_ref(NULL, &zsaso->so);
FREE(zsaso);
}

static void *
nv30_vp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
/*struct nv30_context *nv30 = nv30_context(pipe);*/
struct nv30_vertex_program *vp;

vp = CALLOC(1, sizeof(struct nv30_vertex_program));
vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
/*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/

return (void *)vp;
}

static void
nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->vertprog = hwcso;
nv30->dirty |= NV30_NEW_VERTPROG;
/*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/
}

static void
nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_vertex_program *vp = hwcso;

/*draw_delete_vertex_shader(nv30->draw, vp->draw);*/
nv30_vertprog_destroy(nv30, vp);
FREE((void*)vp->pipe.tokens);
FREE(vp);
}

static void *
nv30_fp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
struct nv30_fragment_program *fp;

fp = CALLOC(1, sizeof(struct nv30_fragment_program));
fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);

tgsi_scan_shader(fp->pipe.tokens, &fp->info);

return (void *)fp;
}

static void
nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->fragprog = hwcso;
nv30->dirty |= NV30_NEW_FRAGPROG;
}

static void
nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_fragment_program *fp = hwcso;

nv30_fragprog_destroy(nv30, fp);
FREE((void*)fp->pipe.tokens);
FREE(fp);
}

static void
nv30_set_blend_color(struct pipe_context *pipe,
const struct pipe_blend_color *bcol)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->blend_colour = *bcol;
nv30->dirty |= NV30_NEW_BCOL;
}

static void
nv30_set_stencil_ref(struct pipe_context *pipe,
const struct pipe_stencil_ref *sr)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->stencil_ref = *sr;
nv30->dirty |= NV30_NEW_SR;
}

static void
nv30_set_clip_state(struct pipe_context *pipe,
const struct pipe_clip_state *clip)
{
}

static void
nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_buffer *buf )
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->constbuf[shader] = buf;
nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float));

if (shader == PIPE_SHADER_VERTEX) {
nv30->dirty |= NV30_NEW_VERTPROG;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
nv30->dirty |= NV30_NEW_FRAGPROG;
}
}

static void
nv30_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->framebuffer = *fb;
nv30->dirty |= NV30_NEW_FB;
}

static void
nv30_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *stipple)
{
struct nv30_context *nv30 = nv30_context(pipe);

memcpy(nv30->stipple, stipple->stipple, 4 * 32);
nv30->dirty |= NV30_NEW_STIPPLE;
}

static void
nv30_set_scissor_state(struct pipe_context *pipe,
const struct pipe_scissor_state *s)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->scissor = *s;
nv30->dirty |= NV30_NEW_SCISSOR;
}

static void
nv30_set_viewport_state(struct pipe_context *pipe,
const struct pipe_viewport_state *vpt)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->viewport = *vpt;
nv30->dirty |= NV30_NEW_VIEWPORT;
/*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/
}

static void
nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
const struct pipe_vertex_buffer *vb)
{
struct nv30_context *nv30 = nv30_context(pipe);

memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count);
nv30->vtxbuf_nr = count;

nv30->dirty |= NV30_NEW_ARRAYS;
/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
}

static void *
nv30_vtxelts_state_create(struct pipe_context *pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
{
struct nv30_vtxelt_state *cso = CALLOC_STRUCT(nv30_vtxelt_state);

assert(num_elements < 16); /* not doing fallbacks yet */
cso->num_elements = num_elements;
memcpy(cso->pipe, elements, num_elements * sizeof(*elements));

/* nv30_vtxelt_construct(cso);*/

return (void *)cso;
}

static void
nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}

static void
nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);

nv30->vtxelt = hwcso;
nv30->dirty |= NV30_NEW_ARRAYS;
/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
}

void
nv30_init_state_functions(struct nv30_context *nv30)
{
nv30->pipe.create_blend_state = nv30_blend_state_create;
nv30->pipe.bind_blend_state = nv30_blend_state_bind;
nv30->pipe.delete_blend_state = nv30_blend_state_delete;

nv30->pipe.create_sampler_state = nv30_sampler_state_create;
nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind;
nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
nv30->pipe.set_fragment_sampler_views = nv30_set_fragment_sampler_views;
nv30->pipe.create_sampler_view = nv30_create_sampler_view;
nv30->pipe.sampler_view_destroy = nv30_sampler_view_destroy;

nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete;

nv30->pipe.create_depth_stencil_alpha_state =
nv30_depth_stencil_alpha_state_create;
nv30->pipe.bind_depth_stencil_alpha_state =
nv30_depth_stencil_alpha_state_bind;
nv30->pipe.delete_depth_stencil_alpha_state =
nv30_depth_stencil_alpha_state_delete;

nv30->pipe.create_vs_state = nv30_vp_state_create;
nv30->pipe.bind_vs_state = nv30_vp_state_bind;
nv30->pipe.delete_vs_state = nv30_vp_state_delete;

nv30->pipe.create_fs_state = nv30_fp_state_create;
nv30->pipe.bind_fs_state = nv30_fp_state_bind;
nv30->pipe.delete_fs_state = nv30_fp_state_delete;

nv30->pipe.set_blend_color = nv30_set_blend_color;
nv30->pipe.set_stencil_ref = nv30_set_stencil_ref;
nv30->pipe.set_clip_state = nv30_set_clip_state;
nv30->pipe.set_constant_buffer = nv30_set_constant_buffer;
nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state;
nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple;
nv30->pipe.set_scissor_state = nv30_set_scissor_state;
nv30->pipe.set_viewport_state = nv30_set_viewport_state;

nv30->pipe.create_vertex_elements_state = nv30_vtxelts_state_create;
nv30->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete;
nv30->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind;

nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
}


+ 0
- 88
src/gallium/drivers/nv30/nv30_state.h 파일 보기

@@ -1,88 +0,0 @@
#ifndef __NV30_STATE_H__
#define __NV30_STATE_H__

#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"

struct nv30_sampler_state {
uint32_t fmt;
uint32_t wrap;
uint32_t en;
uint32_t filt;
uint32_t bcol;
};

struct nv30_vertex_program_exec {
uint32_t data[4];
boolean has_branch_offset;
int const_index;
};

struct nv30_vertex_program_data {
int index; /* immediates == -1 */
float value[4];
};

struct nv30_vertex_program {
struct pipe_shader_state pipe;

boolean translated;

struct nv30_vertex_program_exec *insns;
unsigned nr_insns;
struct nv30_vertex_program_data *consts;
unsigned nr_consts;

struct nouveau_resource *exec;
unsigned exec_start;
struct nouveau_resource *data;
unsigned data_start;
unsigned data_start_min;

uint32_t ir;
uint32_t or;
struct nouveau_stateobj *so;
};

struct nv30_fragment_program_data {
unsigned offset;
unsigned index;
};

struct nv30_fragment_program {
struct pipe_shader_state pipe;
struct tgsi_shader_info info;

boolean translated;
boolean on_hw;
unsigned samplers;

uint32_t *insn;
int insn_len;

struct nv30_fragment_program_data *consts;
unsigned nr_consts;

struct pipe_buffer *buffer;

uint32_t fp_control;
uint32_t fp_reg_control;
struct nouveau_stateobj *so;
};

#define NV30_MAX_TEXTURE_LEVELS 16

struct nv30_miptree {
struct pipe_texture base;
struct nouveau_bo *bo;

struct pipe_buffer *buffer;
uint total_size;

struct {
uint pitch;
uint *image_offset;
} level[NV30_MAX_TEXTURE_LEVELS];
};

#endif

+ 0
- 41
src/gallium/drivers/nv30/nv30_state_blend.c 파일 보기

@@ -1,41 +0,0 @@
#include "nv30_context.h"

static boolean
nv30_state_blend_validate(struct nv30_context *nv30)
{
so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]);
return TRUE;
}

struct nv30_state_entry nv30_state_blend = {
.validate = nv30_state_blend_validate,
.dirty = {
.pipe = NV30_NEW_BLEND,
.hw = NV30_STATE_BLEND
}
};

static boolean
nv30_state_blend_colour_validate(struct nv30_context *nv30)
{
struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nv30->blend_colour;

so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
(float_to_ubyte(bcol->color[0]) << 16) |
(float_to_ubyte(bcol->color[1]) << 8) |
(float_to_ubyte(bcol->color[2]) << 0)));

so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]);
so_ref(NULL, &so);
return TRUE;
}

struct nv30_state_entry nv30_state_blend_colour = {
.validate = nv30_state_blend_colour_validate,
.dirty = {
.pipe = NV30_NEW_BCOL,
.hw = NV30_STATE_BCOL
}
};

+ 0
- 122
src/gallium/drivers/nv30/nv30_state_emit.c 파일 보기

@@ -1,122 +0,0 @@
#include "nv30_context.h"
#include "nv30_state.h"

static struct nv30_state_entry *render_states[] = {
&nv30_state_framebuffer,
&nv30_state_rasterizer,
&nv30_state_scissor,
&nv30_state_stipple,
&nv30_state_fragprog,
&nv30_state_fragtex,
&nv30_state_vertprog,
&nv30_state_blend,
&nv30_state_blend_colour,
&nv30_state_zsa,
&nv30_state_sr,
&nv30_state_viewport,
&nv30_state_vbo,
NULL
};

static void
nv30_state_do_validate(struct nv30_context *nv30,
struct nv30_state_entry **states)
{
while (*states) {
struct nv30_state_entry *e = *states;

if (nv30->dirty & e->dirty.pipe) {
if (e->validate(nv30)) {
nv30->state.dirty |= (1ULL << e->dirty.hw);
}
}

states++;
}
nv30->dirty = 0;
}

void
nv30_state_emit(struct nv30_context *nv30)
{
struct nouveau_channel *chan = nv30->screen->base.channel;
struct nv30_state *state = &nv30->state;
struct nv30_screen *screen = nv30->screen;
unsigned i;
uint64_t states;

/* XXX: racy!
*/
if (nv30 != screen->cur_ctx) {
for (i = 0; i < NV30_STATE_MAX; i++) {
if (state->hw[i] && screen->state[i] != state->hw[i])
state->dirty |= (1ULL << i);
}

screen->cur_ctx = nv30;
}

for (i = 0, states = state->dirty; states; i++) {
if (!(states & (1ULL << i)))
continue;
so_ref (state->hw[i], &nv30->screen->state[i]);
if (state->hw[i])
so_emit(chan, nv30->screen->state[i]);
states &= ~(1ULL << i);
}

state->dirty = 0;
}

void
nv30_state_flush_notify(struct nouveau_channel *chan)
{
struct nv30_context *nv30 = chan->user_private;
struct nv30_state *state = &nv30->state;
unsigned i, samplers;

so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
if (!(samplers & (1 << i)))
continue;
so_emit_reloc_markers(chan,
state->hw[NV30_STATE_FRAGTEX0+i]);
samplers &= ~(1ULL << i);
}
so_emit_reloc_markers(chan, state->hw[NV30_STATE_FRAGPROG]);
if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/)
so_emit_reloc_markers(chan, state->hw[NV30_STATE_VTXBUF]);
}

boolean
nv30_state_validate(struct nv30_context *nv30)
{
#if 0
boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE;

if (nv30->render_mode != HW) {
/* Don't even bother trying to go back to hw if none
* of the states that caused swtnl previously have changed.
*/
if ((nv30->fallback_swtnl & nv30->dirty)
!= nv30->fallback_swtnl)
return FALSE;

/* Attempt to go to hwtnl again */
nv30->pipe.flush(&nv30->pipe, 0, NULL);
nv30->dirty |= (NV30_NEW_VIEWPORT |
NV30_NEW_VERTPROG |
NV30_NEW_ARRAYS);
nv30->render_mode = HW;
}
#endif
nv30_state_do_validate(nv30, render_states);
#if 0
if (nv30->fallback_swtnl || nv30->fallback_swrast)
return FALSE;
if (was_sw)
NOUVEAU_ERR("swtnl->hw\n");
#endif
return TRUE;
}

+ 0
- 173
src/gallium/drivers/nv30/nv30_state_fb.c 파일 보기

@@ -1,173 +0,0 @@
#include "nv30_context.h"
#include "nouveau/nouveau_util.h"

static boolean
nv30_state_framebuffer_validate(struct nv30_context *nv30)
{
struct pipe_framebuffer_state *fb = &nv30->framebuffer;
struct nouveau_channel *chan = nv30->screen->base.channel;
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct nv04_surface *rt[2], *zeta = NULL;
uint32_t rt_enable = 0, rt_format = 0;
int i, colour_format = 0, zeta_format = 0, depth_only = 0;
struct nouveau_stateobj *so = so_new(12, 18, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;
struct nv30_miptree *nv30mt;
int colour_bits = 32, zeta_bits = 32;

for (i = 0; i < fb->nr_cbufs; i++) {
if (colour_format) {
assert(colour_format == fb->cbufs[i]->format);
} else {
colour_format = fb->cbufs[i]->format;
rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
rt[i] = (struct nv04_surface *)fb->cbufs[i];
}
}

if (rt_enable & NV34TCL_RT_ENABLE_COLOR1)
rt_enable |= NV34TCL_RT_ENABLE_MRT;

if (fb->zsbuf) {
zeta_format = fb->zsbuf->format;
zeta = (struct nv04_surface *)fb->zsbuf;
}

if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) {
/* Render to at least a colour buffer */
if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
for (i = 1; i < fb->nr_cbufs; i++)
assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));

rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
(log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
(log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
}
else
rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
} else if (fb->zsbuf) {
depth_only = 1;

/* Render to depth buffer only */
if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));

rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
(log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
(log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
}
else
rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
} else {
return FALSE;
}

switch (colour_format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
case 0:
rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
break;
case PIPE_FORMAT_B5G6R5_UNORM:
rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
colour_bits = 16;
break;
default:
assert(0);
}

switch (zeta_format) {
case PIPE_FORMAT_Z16_UNORM:
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
zeta_bits = 16;
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
case 0:
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
break;
default:
assert(0);
}

if (colour_bits > zeta_bits) {
return FALSE;
}

if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) {
struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]);
uint32_t pitch = rt0->pitch;

if (zeta) {
pitch |= (zeta->pitch << 16);
} else {
pitch |= (pitch << 16);
}

nv30mt = (struct nv30_miptree *) rt0->base.texture;
so_method(so, rankine, NV34TCL_DMA_COLOR0, 1);
so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2);
so_data (so, pitch);
so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset,
rt_flags | NOUVEAU_BO_LOW, 0, 0);
}

if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
nv30mt = (struct nv30_miptree *)rt[1]->base.texture;
so_method(so, rankine, NV34TCL_DMA_COLOR1, 1);
so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, rankine, NV34TCL_COLOR1_OFFSET, 2);
so_reloc (so, nouveau_bo(nv30mt->buffer), rt[1]->base.offset,
rt_flags | NOUVEAU_BO_LOW, 0, 0);
so_data (so, rt[1]->pitch);
}

if (zeta_format) {
nv30mt = (struct nv30_miptree *)zeta->base.texture;
so_method(so, rankine, NV34TCL_DMA_ZETA, 1);
so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, rankine, NV34TCL_ZETA_OFFSET, 1);
so_reloc (so, nouveau_bo(nv30mt->buffer), zeta->base.offset,
rt_flags | NOUVEAU_BO_LOW, 0, 0);
/* TODO: allocate LMA depth buffer */
}

so_method(so, rankine, NV34TCL_RT_ENABLE, 1);
so_data (so, rt_enable);
so_method(so, rankine, NV34TCL_RT_HORIZ, 3);
so_data (so, (w << 16) | 0);
so_data (so, (h << 16) | 0);
so_data (so, rt_format);
so_method(so, rankine, NV34TCL_VIEWPORT_HORIZ, 2);
so_data (so, (w << 16) | 0);
so_data (so, (h << 16) | 0);
so_method(so, rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
so_data (so, ((w - 1) << 16) | 0);
so_data (so, ((h - 1) << 16) | 0);
so_method(so, rankine, 0x1d88, 1);
so_data (so, (1 << 12) | h);
/* Wonder why this is needed, context should all be set to zero on init */
so_method(so, rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
so_data (so, 0);

so_ref(so, &nv30->state.hw[NV30_STATE_FB]);
so_ref(NULL, &so);
return TRUE;
}

struct nv30_state_entry nv30_state_framebuffer = {
.validate = nv30_state_framebuffer_validate,
.dirty = {
.pipe = NV30_NEW_FB,
.hw = NV30_STATE_FB
}
};

+ 0
- 17
src/gallium/drivers/nv30/nv30_state_rasterizer.c 파일 보기

@@ -1,17 +0,0 @@
#include "nv30_context.h"

static boolean
nv30_state_rasterizer_validate(struct nv30_context *nv30)
{
so_ref(nv30->rasterizer->so,
&nv30->state.hw[NV30_STATE_RAST]);
return TRUE;
}

struct nv30_state_entry nv30_state_rasterizer = {
.validate = nv30_state_rasterizer_validate,
.dirty = {
.pipe = NV30_NEW_RAST,
.hw = NV30_STATE_RAST
}
};

+ 0
- 36
src/gallium/drivers/nv30/nv30_state_scissor.c 파일 보기

@@ -1,36 +0,0 @@
#include "nv30_context.h"

static boolean
nv30_state_scissor_validate(struct nv30_context *nv30)
{
struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
struct pipe_scissor_state *s = &nv30->scissor;
struct nouveau_stateobj *so;

if (nv30->state.hw[NV30_STATE_SCISSOR] &&
(rast->scissor == 0 && nv30->state.scissor_enabled == 0))
return FALSE;
nv30->state.scissor_enabled = rast->scissor;

so = so_new(1, 2, 0);
so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
if (nv30->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
} else {
so_data (so, 4096 << 16);
so_data (so, 4096 << 16);
}

so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]);
so_ref(NULL, &so);
return TRUE;
}

struct nv30_state_entry nv30_state_scissor = {
.validate = nv30_state_scissor_validate,
.dirty = {
.pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST,
.hw = NV30_STATE_SCISSOR
}
};

+ 0
- 40
src/gallium/drivers/nv30/nv30_state_stipple.c 파일 보기

@@ -1,40 +0,0 @@
#include "nv30_context.h"

static boolean
nv30_state_stipple_validate(struct nv30_context *nv30)
{
struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct nouveau_stateobj *so;

if (nv30->state.hw[NV30_STATE_STIPPLE] &&
(rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0))
return FALSE;

if (rast->poly_stipple_enable) {
unsigned i;

so = so_new(2, 33, 0);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nv30->stipple[i]);
} else {
so = so_new(1, 1, 0);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}

so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]);
so_ref(NULL, &so);
return TRUE;
}

struct nv30_state_entry nv30_state_stipple = {
.validate = nv30_state_stipple_validate,
.dirty = {
.pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST,
.hw = NV30_STATE_STIPPLE,
}
};

+ 0
- 42
src/gallium/drivers/nv30/nv30_state_viewport.c 파일 보기

@@ -1,42 +0,0 @@
#include "nv30_context.h"

static boolean
nv30_state_viewport_validate(struct nv30_context *nv30)
{
struct pipe_viewport_state *vpt = &nv30->viewport;
struct nouveau_stateobj *so;

if (nv30->state.hw[NV30_STATE_VIEWPORT] &&
!(nv30->dirty & NV30_NEW_VIEWPORT))
return FALSE;

so = so_new(3, 10, 0);
so_method(so, nv30->screen->rankine,
NV34TCL_VIEWPORT_TRANSLATE_X, 8);
so_data (so, fui(vpt->translate[0]));
so_data (so, fui(vpt->translate[1]));
so_data (so, fui(vpt->translate[2]));
so_data (so, fui(vpt->translate[3]));
so_data (so, fui(vpt->scale[0]));
so_data (so, fui(vpt->scale[1]));
so_data (so, fui(vpt->scale[2]));
so_data (so, fui(vpt->scale[3]));
/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
so_data (so, 1);
*/
/* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */
so_method(so, nv30->screen->rankine, 0x1d78, 1);
so_data (so, 1);

so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]);
so_ref(NULL, &so);
return TRUE;
}

struct nv30_state_entry nv30_state_viewport = {
.validate = nv30_state_viewport_validate,
.dirty = {
.pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST,
.hw = NV30_STATE_VIEWPORT
}
};

+ 0
- 41
src/gallium/drivers/nv30/nv30_state_zsa.c 파일 보기

@@ -1,41 +0,0 @@
#include "nv30_context.h"

static boolean
nv30_state_zsa_validate(struct nv30_context *nv30)
{
so_ref(nv30->zsa->so,
&nv30->state.hw[NV30_STATE_ZSA]);
return TRUE;
}

struct nv30_state_entry nv30_state_zsa = {
.validate = nv30_state_zsa_validate,
.dirty = {
.pipe = NV30_NEW_ZSA,
.hw = NV30_STATE_ZSA
}
};

static boolean
nv30_state_sr_validate(struct nv30_context *nv30)
{
struct nouveau_stateobj *so = so_new(2, 2, 0);
struct pipe_stencil_ref *sr = &nv30->stencil_ref;

so_method(so, nv30->screen->rankine, NV34TCL_STENCIL_FRONT_FUNC_REF, 1);
so_data (so, sr->ref_value[0]);
so_method(so, nv30->screen->rankine, NV34TCL_STENCIL_BACK_FUNC_REF, 1);
so_data (so, sr->ref_value[1]);

so_ref(so, &nv30->state.hw[NV30_STATE_SR]);
so_ref(NULL, &so);
return TRUE;
}

struct nv30_state_entry nv30_state_sr = {
.validate = nv30_state_sr_validate,
.dirty = {
.pipe = NV30_NEW_SR,
.hw = NV30_STATE_SR
}
};

+ 0
- 842
src/gallium/drivers/nv30/nv30_vertprog.c 파일 보기

@@ -1,842 +0,0 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"

#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"

#include "nv30_context.h"
#include "nv30_state.h"

/* TODO (at least...):
* 1. Indexed consts + ARL
* 2. Arb. swz/negation
* 3. NV_vp11, NV_vp2, NV_vp3 features
* - extra arith opcodes
* - branching
* - texture sampling
* - indexed attribs
* - indexed results
* 4. bugs
*/

#define SWZ_X 0
#define SWZ_Y 1
#define SWZ_Z 2
#define SWZ_W 3
#define MASK_X 8
#define MASK_Y 4
#define MASK_Z 2
#define MASK_W 1
#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
#define DEF_SCALE 0
#define DEF_CTEST 0
#include "nv30_shader.h"

#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
#define neg(s) nv30_sr_neg((s))
#define abs(s) nv30_sr_abs((s))

struct nv30_vpc {
struct nv30_vertex_program *vp;

struct nv30_vertex_program_exec *vpi;

unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];

int high_temp;
int temp_temp_count;

struct nv30_sreg *imm;
unsigned nr_imm;
};

static struct nv30_sreg
temp(struct nv30_vpc *vpc)
{
int idx;

idx = vpc->temp_temp_count++;
idx += vpc->high_temp + 1;
return nv30_sr(NV30SR_TEMP, idx);
}

static struct nv30_sreg
constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
{
struct nv30_vertex_program *vp = vpc->vp;
struct nv30_vertex_program_data *vpd;
int idx;

if (pipe >= 0) {
for (idx = 0; idx < vp->nr_consts; idx++) {
if (vp->consts[idx].index == pipe)
return nv30_sr(NV30SR_CONST, idx);
}
}

idx = vp->nr_consts++;
vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
vpd = &vp->consts[idx];

vpd->index = pipe;
vpd->value[0] = x;
vpd->value[1] = y;
vpd->value[2] = z;
vpd->value[3] = w;
return nv30_sr(NV30SR_CONST, idx);
}

#define arith(cc,s,o,d,m,s0,s1,s2) \
nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))

static void
emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
{
struct nv30_vertex_program *vp = vpc->vp;
uint32_t sr = 0;

switch (src.type) {
case NV30SR_TEMP:
sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
break;
case NV30SR_INPUT:
sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
NV30_VP_SRC_REG_TYPE_SHIFT);
vp->ir |= (1 << src.index);
hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
break;
case NV30SR_CONST:
sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
NV30_VP_SRC_REG_TYPE_SHIFT);
assert(vpc->vpi->const_index == -1 ||
vpc->vpi->const_index == src.index);
vpc->vpi->const_index = src.index;
break;
case NV30SR_NONE:
sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
NV30_VP_SRC_REG_TYPE_SHIFT);
break;
default:
assert(0);
}

if (src.negate)
sr |= NV30_VP_SRC_NEGATE;

if (src.abs)
hw[0] |= (1 << (21 + pos));

sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
(src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
(src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
(src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));

/*
* |VVV|
* d�.�b
* \u/
*
*/

switch (pos) {
case 0:
hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
NV30_VP_INST_SRC0L_SHIFT;
break;
case 1:
hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
break;
case 2:
hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
NV30_VP_INST_SRC2L_SHIFT;
break;
default:
assert(0);
}
}

static void
emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
{
struct nv30_vertex_program *vp = vpc->vp;

switch (dst.type) {
case NV30SR_TEMP:
hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
break;
case NV30SR_OUTPUT:
switch (dst.index) {
case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
default:
break;
}

hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);

/*XXX: no way this is entirely correct, someone needs to
* figure out what exactly it is.
*/
hw[3] |= 0x800;
break;
default:
assert(0);
}
}

static void
nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
struct nv30_sreg dst, int mask,
struct nv30_sreg s0, struct nv30_sreg s1,
struct nv30_sreg s2)
{
struct nv30_vertex_program *vp = vpc->vp;
uint32_t *hw;

vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
vpc->vpi = &vp->insns[vp->nr_insns - 1];
memset(vpc->vpi, 0, sizeof(*vpc->vpi));
vpc->vpi->const_index = -1;

hw = vpc->vpi->data;

hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
(1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
(2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
(3 << NV30_VP_INST_COND_SWZ_W_SHIFT));

hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);

if (dst.type == NV30SR_OUTPUT) {
if (slot)
hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
else
hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
} else {
if (slot)
hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
else
hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
}

emit_dst(vpc, hw, slot, dst);
emit_src(vpc, hw, 0, s0);
emit_src(vpc, hw, 1, s1);
emit_src(vpc, hw, 2, s2);
}

static INLINE struct nv30_sreg
tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
struct nv30_sreg src;

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index);
break;
case TGSI_FILE_CONSTANT:
src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
break;
case TGSI_FILE_IMMEDIATE:
src = vpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
if (vpc->high_temp < fsrc->Register.Index)
vpc->high_temp = fsrc->Register.Index;
src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index);
break;
default:
NOUVEAU_ERR("bad src file\n");
break;
}

src.abs = fsrc->Register.Absolute;
src.negate = fsrc->Register.Negate;
src.swz[0] = fsrc->Register.SwizzleX;
src.swz[1] = fsrc->Register.SwizzleY;
src.swz[2] = fsrc->Register.SwizzleZ;
src.swz[3] = fsrc->Register.SwizzleW;
return src;
}

static INLINE struct nv30_sreg
tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
struct nv30_sreg dst;

switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
dst = nv30_sr(NV30SR_OUTPUT,
vpc->output_map[fdst->Register.Index]);

break;
case TGSI_FILE_TEMPORARY:
dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index);
if (vpc->high_temp < dst.index)
vpc->high_temp = dst.index;
break;
default:
NOUVEAU_ERR("bad dst file\n");
break;
}

return dst;
}

static INLINE int
tgsi_mask(uint tgsi)
{
int mask = 0;

if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
return mask;
}

static boolean
nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
const struct tgsi_full_instruction *finst)
{
struct nv30_sreg src[3], dst, tmp;
struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
int mask;
int ai = -1, ci = -1;
int i;

if (finst->Instruction.Opcode == TGSI_OPCODE_END)
return TRUE;

vpc->temp_temp_count = 0;
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;

fsrc = &finst->Src[i];
if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(vpc, fsrc);
}
}

for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;

fsrc = &finst->Src[i];
switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
if (ai == -1 || ai == fsrc->Register.Index) {
ai = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
tgsi_src(vpc, fsrc), none, none);
}
break;
/*XXX: index comparison is broken now that consts come from
* two different register files.
*/
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
if (ci == -1 || ci == fsrc->Register.Index) {
ci = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
tgsi_src(vpc, fsrc), none, none);
}
break;
case TGSI_FILE_TEMPORARY:
/* handled above */
break;
default:
NOUVEAU_ERR("bad src file\n");
return FALSE;
}
}

dst = tgsi_dst(vpc, &finst->Dst[0]);
mask = tgsi_mask(finst->Dst[0].Register.WriteMask);

switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
break;
case TGSI_OPCODE_ADD:
arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
break;
case TGSI_OPCODE_ARL:
arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_DP3:
arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_DP4:
arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_DPH:
arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_DST:
arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_EX2:
arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_EXP:
arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_FLR:
arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_FRC:
arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_LG2:
arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_LIT:
arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_LOG:
arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_MAD:
arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
break;
case TGSI_OPCODE_MAX:
arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_MIN:
arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_MOV:
arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_MUL:
arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_POW:
tmp = temp(vpc);
arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
swz(src[0], X, X, X, X));
arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
swz(src[1], X, X, X, X), none);
arith(vpc, 1, OP_EX2, dst, mask, none, none,
swz(tmp, X, X, X, X));
break;
case TGSI_OPCODE_RCP:
arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_RET:
break;
case TGSI_OPCODE_RSQ:
arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
break;
case TGSI_OPCODE_SGE:
arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_SGT:
arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_SLT:
arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_SUB:
arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
break;
case TGSI_OPCODE_XPD:
tmp = temp(vpc);
arith(vpc, 0, OP_MUL, tmp, mask,
swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
neg(tmp));
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
return FALSE;
}

return TRUE;
}

static boolean
nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
const struct tgsi_full_declaration *fdec)
{
int hw;

switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV30_VP_INST_DEST_POS;
break;
case TGSI_SEMANTIC_COLOR:
if (fdec->Semantic.Index == 0) {
hw = NV30_VP_INST_DEST_COL0;
} else
if (fdec->Semantic.Index == 1) {
hw = NV30_VP_INST_DEST_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
return FALSE;
}
break;
case TGSI_SEMANTIC_BCOLOR:
if (fdec->Semantic.Index == 0) {
hw = NV30_VP_INST_DEST_BFC0;
} else
if (fdec->Semantic.Index == 1) {
hw = NV30_VP_INST_DEST_BFC1;
} else {
NOUVEAU_ERR("bad bcolour semantic index\n");
return FALSE;
}
break;
case TGSI_SEMANTIC_FOG:
hw = NV30_VP_INST_DEST_FOGC;
break;
case TGSI_SEMANTIC_PSIZE:
hw = NV30_VP_INST_DEST_PSZ;
break;
case TGSI_SEMANTIC_GENERIC:
if (fdec->Semantic.Index <= 7) {
hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
}
break;
case TGSI_SEMANTIC_EDGEFLAG:
NOUVEAU_ERR("cannot handle edgeflag output\n");
return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
}

vpc->output_map[fdec->Range.First] = hw;
return TRUE;
}

static boolean
nv30_vertprog_prepare(struct nv30_vpc *vpc)
{
struct tgsi_parse_context p;
int nr_imm = 0;

tgsi_parse_init(&p, vpc->vp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
const union tgsi_full_token *tok = &p.FullToken;

tgsi_parse_token(&p);
switch(tok->Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
nr_imm++;
break;
default:
break;
}
}
tgsi_parse_free(&p);

if (nr_imm) {
vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
assert(vpc->imm);
}

return TRUE;
}

static void
nv30_vertprog_translate(struct nv30_context *nv30,
struct nv30_vertex_program *vp)
{
struct tgsi_parse_context parse;
struct nv30_vpc *vpc = NULL;

tgsi_dump(vp->pipe.tokens,0);

vpc = CALLOC(1, sizeof(struct nv30_vpc));
if (!vpc)
return;
vpc->vp = vp;
vpc->high_temp = -1;

if (!nv30_vertprog_prepare(vpc)) {
FREE(vpc);
return;
}

tgsi_parse_init(&parse, vp->pipe.tokens);

while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);

switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
{
const struct tgsi_full_declaration *fdec;
fdec = &parse.FullToken.FullDeclaration;
switch (fdec->Declaration.File) {
case TGSI_FILE_OUTPUT:
if (!nv30_vertprog_parse_decl_output(vpc, fdec))
goto out_err;
break;
default:
break;
}
}
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
const struct tgsi_full_immediate *imm;

imm = &parse.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
imm->u[0].Float,
imm->u[1].Float,
imm->u[2].Float,
imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
{
const struct tgsi_full_instruction *finst;
finst = &parse.FullToken.FullInstruction;
if (!nv30_vertprog_parse_instruction(vpc, finst))
goto out_err;
}
break;
default:
break;
}
}

vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
vp->translated = TRUE;
out_err:
tgsi_parse_free(&parse);
FREE(vpc);
}

static boolean
nv30_vertprog_validate(struct nv30_context *nv30)
{
struct pipe_screen *pscreen = nv30->pipe.screen;
struct nv30_screen *screen = nv30->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nv30_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
int i;

vp = nv30->vertprog;
constbuf = nv30->constbuf[PIPE_SHADER_VERTEX];

/* Translate TGSI shader into hw bytecode */
if (!vp->translated) {
nv30_vertprog_translate(nv30, vp);
if (!vp->translated)
return FALSE;
}

/* Allocate hw vtxprog exec slots */
if (!vp->exec) {
struct nouveau_resource *heap = nv30->screen->vp_exec_heap;
struct nouveau_stateobj *so;
uint vplen = vp->nr_insns;

if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
while (heap->next && heap->size < vplen) {
struct nv30_vertex_program *evict;
evict = heap->next->priv;
nouveau_resource_free(&evict->exec);
}

if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
assert(0);
}

so = so_new(1, 1, 0);
so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
so_ref(so, &vp->so);
so_ref(NULL, &so);

upload_code = TRUE;
}

/* Allocate hw vtxprog const slots */
if (vp->nr_consts && !vp->data) {
struct nouveau_resource *heap = nv30->screen->vp_data_heap;

if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
while (heap->next && heap->size < vp->nr_consts) {
struct nv30_vertex_program *evict;
evict = heap->next->priv;
nouveau_resource_free(&evict->data);
}

if (nouveau_resource_alloc(heap, vp->nr_consts, vp,
&vp->data))
assert(0);
}

/*XXX: handle this some day */
assert(vp->data->start >= vp->data_start_min);

upload_data = TRUE;
if (vp->data_start != vp->data->start)
upload_code = TRUE;
}

/* If exec or data segments moved we need to patch the program to
* fixup offsets and register IDs.
*/
if (vp->exec_start != vp->exec->start) {
for (i = 0; i < vp->nr_insns; i++) {
struct nv30_vertex_program_exec *vpi = &vp->insns[i];

if (vpi->has_branch_offset) {
assert(0);
}
}

vp->exec_start = vp->exec->start;
}

if (vp->nr_consts && vp->data_start != vp->data->start) {
for (i = 0; i < vp->nr_insns; i++) {
struct nv30_vertex_program_exec *vpi = &vp->insns[i];

if (vpi->const_index >= 0) {
vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
vpi->data[1] |=
(vpi->const_index + vp->data->start) <<
NV30_VP_INST_CONST_SRC_SHIFT;

}
}

vp->data_start = vp->data->start;
}

/* Update + Upload constant values */
if (vp->nr_consts) {
float *map = NULL;

if (constbuf) {
map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
}

for (i = 0; i < vp->nr_consts; i++) {
struct nv30_vertex_program_data *vpd = &vp->consts[i];

if (vpd->index >= 0) {
if (!upload_data &&
!memcmp(vpd->value, &map[vpd->index * 4],
4 * sizeof(float)))
continue;
memcpy(vpd->value, &map[vpd->index * 4],
4 * sizeof(float));
}

BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
OUT_RING (chan, i + vp->data->start);
OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}

if (constbuf)
pipe_buffer_unmap(pscreen, constbuf);
}

/* Upload vtxprog */
if (upload_code) {
#if 0
for (i = 0; i < vp->nr_insns; i++) {
NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
i, vp->insns[i].data[0], vp->insns[i].data[1],
vp->insns[i].data[2], vp->insns[i].data[3]);
}
#endif
BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
OUT_RING (chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
OUT_RINGp (chan, vp->insns[i].data, 4);
}
}

if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) {
so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]);
return TRUE;
}

return FALSE;
}

void
nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
{
vp->translated = FALSE;

if (vp->nr_insns) {
FREE(vp->insns);
vp->insns = NULL;
vp->nr_insns = 0;
}

if (vp->nr_consts) {
FREE(vp->consts);
vp->consts = NULL;
vp->nr_consts = 0;
}

nouveau_resource_free(&vp->exec);
vp->exec_start = 0;
nouveau_resource_free(&vp->data);
vp->data_start = 0;
vp->data_start_min = 0;

vp->ir = vp->or = 0;
so_ref(NULL, &vp->so);
}

struct nv30_state_entry nv30_state_vertprog = {
.validate = nv30_vertprog_validate,
.dirty = {
.pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
.hw = NV30_STATE_VERTPROG,
}
};

+ 0
- 29
src/gallium/drivers/nv40/Makefile 파일 보기

@@ -1,29 +0,0 @@
TOP = ../../../..
include $(TOP)/configs/current

LIBNAME = nv40

C_SOURCES = \
nv40_clear.c \
nv40_context.c \
nv40_draw.c \
nv40_fragprog.c \
nv40_fragtex.c \
nv40_miptree.c \
nv40_query.c \
nv40_screen.c \
nv40_state.c \
nv40_state_blend.c \
nv40_state_emit.c \
nv40_state_fb.c \
nv40_state_rasterizer.c \
nv40_state_scissor.c \
nv40_state_stipple.c \
nv40_state_viewport.c \
nv40_state_zsa.c \
nv40_surface.c \
nv40_transfer.c \
nv40_vbo.c \
nv40_vertprog.c

include ../../Makefile.template

+ 0
- 88
src/gallium/drivers/nv40/nv40_context.c 파일 보기

@@ -1,88 +0,0 @@
#include "draw/draw_context.h"
#include "pipe/p_defines.h"

#include "nv40_context.h"
#include "nv40_screen.h"

static void
nv40_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
BEGIN_RING(chan, curie, 0x1fd8, 1);
OUT_RING (chan, 2);
BEGIN_RING(chan, curie, 0x1fd8, 1);
OUT_RING (chan, 1);
}

FIRE_RING(chan);
if (fence)
*fence = NULL;
}

static void
nv40_destroy(struct pipe_context *pipe)
{
struct nv40_context *nv40 = nv40_context(pipe);
unsigned i;

for (i = 0; i < NV40_STATE_MAX; i++) {
if (nv40->state.hw[i])
so_ref(NULL, &nv40->state.hw[i]);
}

if (nv40->draw)
draw_destroy(nv40->draw);
FREE(nv40);
}

struct pipe_context *
nv40_create(struct pipe_screen *pscreen, void *priv)
{
struct nv40_screen *screen = nv40_screen(pscreen);
struct pipe_winsys *ws = pscreen->winsys;
struct nv40_context *nv40;
struct nouveau_winsys *nvws = screen->nvws;

nv40 = CALLOC(1, sizeof(struct nv40_context));
if (!nv40)
return NULL;
nv40->screen = screen;

nv40->nvws = nvws;

nv40->pipe.winsys = ws;
nv40->pipe.priv = priv;
nv40->pipe.screen = pscreen;
nv40->pipe.destroy = nv40_destroy;
nv40->pipe.draw_arrays = nv40_draw_arrays;
nv40->pipe.draw_elements = nv40_draw_elements;
nv40->pipe.clear = nv40_clear;
nv40->pipe.flush = nv40_flush;

nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;

screen->base.channel->user_private = nv40;
screen->base.channel->flush_notify = nv40_state_flush_notify;

nv40_init_query_functions(nv40);
nv40_init_surface_functions(nv40);
nv40_init_state_functions(nv40);
nv40_init_transfer_functions(nv40);

/* Create, configure, and install fallback swtnl path */
nv40->draw = draw_create();
draw_wide_point_threshold(nv40->draw, 9999999.0);
draw_wide_line_threshold(nv40->draw, 9999999.0);
draw_enable_line_stipple(nv40->draw, FALSE);
draw_enable_point_sprites(nv40->draw, FALSE);
draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));

return &nv40->pipe;
}

+ 0
- 246
src/gallium/drivers/nv40/nv40_context.h 파일 보기

@@ -1,246 +0,0 @@
#ifndef __NV40_CONTEXT_H__
#define __NV40_CONTEXT_H__

#include <stdio.h>

#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_compiler.h"

#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_inlines.h"

#include "draw/draw_vertex.h"

#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
#include "nouveau/nouveau_stateobj.h"

#include "nv40_state.h"

#define NOUVEAU_ERR(fmt, args...) \
fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);

enum nv40_state_index {
NV40_STATE_FB = 0,
NV40_STATE_VIEWPORT = 1,
NV40_STATE_BLEND = 2,
NV40_STATE_RAST = 3,
NV40_STATE_ZSA = 4,
NV40_STATE_BCOL = 5,
NV40_STATE_CLIP = 6,
NV40_STATE_SCISSOR = 7,
NV40_STATE_STIPPLE = 8,
NV40_STATE_FRAGPROG = 9,
NV40_STATE_VERTPROG = 10,
NV40_STATE_FRAGTEX0 = 11,
NV40_STATE_FRAGTEX1 = 12,
NV40_STATE_FRAGTEX2 = 13,
NV40_STATE_FRAGTEX3 = 14,
NV40_STATE_FRAGTEX4 = 15,
NV40_STATE_FRAGTEX5 = 16,
NV40_STATE_FRAGTEX6 = 17,
NV40_STATE_FRAGTEX7 = 18,
NV40_STATE_FRAGTEX8 = 19,
NV40_STATE_FRAGTEX9 = 20,
NV40_STATE_FRAGTEX10 = 21,
NV40_STATE_FRAGTEX11 = 22,
NV40_STATE_FRAGTEX12 = 23,
NV40_STATE_FRAGTEX13 = 24,
NV40_STATE_FRAGTEX14 = 25,
NV40_STATE_FRAGTEX15 = 26,
NV40_STATE_VERTTEX0 = 27,
NV40_STATE_VERTTEX1 = 28,
NV40_STATE_VERTTEX2 = 29,
NV40_STATE_VERTTEX3 = 30,
NV40_STATE_VTXBUF = 31,
NV40_STATE_VTXFMT = 32,
NV40_STATE_VTXATTR = 33,
NV40_STATE_SR = 34,
NV40_STATE_MAX = 35
};

#include "nv40_screen.h"

#define NV40_NEW_BLEND (1 << 0)
#define NV40_NEW_RAST (1 << 1)
#define NV40_NEW_ZSA (1 << 2)
#define NV40_NEW_SAMPLER (1 << 3)
#define NV40_NEW_FB (1 << 4)
#define NV40_NEW_STIPPLE (1 << 5)
#define NV40_NEW_SCISSOR (1 << 6)
#define NV40_NEW_VIEWPORT (1 << 7)
#define NV40_NEW_BCOL (1 << 8)
#define NV40_NEW_VERTPROG (1 << 9)
#define NV40_NEW_FRAGPROG (1 << 10)
#define NV40_NEW_ARRAYS (1 << 11)
#define NV40_NEW_UCP (1 << 12)
#define NV40_NEW_SR (1 << 13)

struct nv40_rasterizer_state {
struct pipe_rasterizer_state pipe;
struct nouveau_stateobj *so;
};

struct nv40_zsa_state {
struct pipe_depth_stencil_alpha_state pipe;
struct nouveau_stateobj *so;
};

struct nv40_blend_state {
struct pipe_blend_state pipe;
struct nouveau_stateobj *so;
};


struct nv40_state {
unsigned scissor_enabled;
unsigned stipple_enabled;
unsigned fp_samplers;

uint64_t dirty;
struct nouveau_stateobj *hw[NV40_STATE_MAX];
};


struct nv40_vtxelt_state {
struct pipe_vertex_element pipe[16];
unsigned num_elements;
};

struct nv40_context {
struct pipe_context pipe;

struct nouveau_winsys *nvws;
struct nv40_screen *screen;

struct draw_context *draw;

/* HW state derived from pipe states */
struct nv40_state state;
struct {
struct nv40_vertex_program *vertprog;

unsigned nr_attribs;
unsigned hw[PIPE_MAX_SHADER_INPUTS];
unsigned draw[PIPE_MAX_SHADER_INPUTS];
unsigned emit[PIPE_MAX_SHADER_INPUTS];
} swtnl;

enum {
HW, SWTNL, SWRAST
} render_mode;
unsigned fallback_swtnl;
unsigned fallback_swrast;

/* Context state */
unsigned dirty, draw_dirty;
struct pipe_scissor_state scissor;
unsigned stipple[32];
struct pipe_clip_state clip;
struct nv40_vertex_program *vertprog;
struct nv40_fragment_program *fragprog;
struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
unsigned constbuf_nr[PIPE_SHADER_TYPES];
struct nv40_rasterizer_state *rasterizer;
struct nv40_zsa_state *zsa;
struct nv40_blend_state *blend;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state framebuffer;
struct pipe_buffer *idxbuf;
unsigned idxbuf_format;
struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
unsigned nr_textures;
unsigned dirty_samplers;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
struct nv40_vtxelt_state *vtxelt;
};

static INLINE struct nv40_context *
nv40_context(struct pipe_context *pipe)
{
return (struct nv40_context *)pipe;
}

struct nv40_state_entry {
boolean (*validate)(struct nv40_context *nv40);
struct {
unsigned pipe;
unsigned hw;
} dirty;
};

extern void nv40_init_state_functions(struct nv40_context *nv40);
extern void nv40_init_surface_functions(struct nv40_context *nv40);
extern void nv40_init_query_functions(struct nv40_context *nv40);
extern void nv40_init_transfer_functions(struct nv40_context *nv40);

extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);

/* nv40_draw.c */
extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
extern void nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf,
unsigned ib_size, unsigned mode,
unsigned start, unsigned count);

/* nv40_vertprog.c */
extern void nv40_vertprog_destroy(struct nv40_context *,
struct nv40_vertex_program *);

/* nv40_fragprog.c */
extern void nv40_fragprog_destroy(struct nv40_context *,
struct nv40_fragment_program *);

/* nv40_fragtex.c */
extern void nv40_fragtex_bind(struct nv40_context *);

/* nv40_state.c and friends */
extern boolean nv40_state_validate(struct nv40_context *nv40);
extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
extern void nv40_state_emit(struct nv40_context *nv40);
extern void nv40_state_flush_notify(struct nouveau_channel *chan);
extern struct nv40_state_entry nv40_state_rasterizer;
extern struct nv40_state_entry nv40_state_scissor;
extern struct nv40_state_entry nv40_state_stipple;
extern struct nv40_state_entry nv40_state_fragprog;
extern struct nv40_state_entry nv40_state_vertprog;
extern struct nv40_state_entry nv40_state_blend;
extern struct nv40_state_entry nv40_state_blend_colour;
extern struct nv40_state_entry nv40_state_zsa;
extern struct nv40_state_entry nv40_state_viewport;
extern struct nv40_state_entry nv40_state_framebuffer;
extern struct nv40_state_entry nv40_state_fragtex;
extern struct nv40_state_entry nv40_state_vbo;
extern struct nv40_state_entry nv40_state_vtxfmt;
extern struct nv40_state_entry nv40_state_sr;

/* nv40_vbo.c */
extern void nv40_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
extern void nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
unsigned count);

/* nv40_clear.c */
extern void nv40_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil);

/* nv40_context.c */
struct pipe_context *
nv40_create(struct pipe_screen *pscreen, void *priv);

#endif

+ 0
- 360
src/gallium/drivers/nv40/nv40_draw.c 파일 보기

@@ -1,360 +0,0 @@
#include "pipe/p_shader_tokens.h"
#include "util/u_inlines.h"

#include "util/u_pack_color.h"

#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pipe.h"

#include "nv40_context.h"
#define NV40_SHADER_NO_FUCKEDNESS
#include "nv40_shader.h"

/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
* often at all. Uses "quadro style" vertex submission + a fixed vertex
* layout to avoid the need to generate a vertex program or vtxfmt.
*/

struct nv40_render_stage {
struct draw_stage stage;
struct nv40_context *nv40;
unsigned prim;
};

static INLINE struct nv40_render_stage *
nv40_render_stage(struct draw_stage *stage)
{
return (struct nv40_render_stage *)stage;
}

static INLINE void
nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
{
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;
unsigned i;

for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
unsigned idx = nv40->swtnl.draw[i];
unsigned hw = nv40->swtnl.hw[i];

switch (nv40->swtnl.emit[i]) {
case EMIT_OMIT:
break;
case EMIT_1F:
BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1);
OUT_RING (chan, fui(v->data[idx][0]));
break;
case EMIT_2F:
BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
OUT_RING (chan, fui(v->data[idx][0]));
OUT_RING (chan, fui(v->data[idx][1]));
break;
case EMIT_3F:
BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
OUT_RING (chan, fui(v->data[idx][0]));
OUT_RING (chan, fui(v->data[idx][1]));
OUT_RING (chan, fui(v->data[idx][2]));
break;
case EMIT_4F:
BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
OUT_RING (chan, fui(v->data[idx][0]));
OUT_RING (chan, fui(v->data[idx][1]));
OUT_RING (chan, fui(v->data[idx][2]));
OUT_RING (chan, fui(v->data[idx][3]));
break;
case EMIT_4UB:
BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
float_to_ubyte(v->data[idx][1]),
float_to_ubyte(v->data[idx][2]),
float_to_ubyte(v->data[idx][3])));
break;
default:
assert(0);
break;
}
}
}

static INLINE void
nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
unsigned mode, unsigned count)
{
struct nv40_render_stage *rs = nv40_render_stage(stage);
struct nv40_context *nv40 = rs->nv40;

struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;
unsigned i;

/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
if (AVAIL_RING(chan) < ((count * 20) + 6)) {
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
NOUVEAU_ERR("AIII, missed flush\n");
assert(0);
}
FIRE_RING(chan);
nv40_state_emit(nv40);
}

/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, mode);
rs->prim = mode;
}

/* Emit vertex data */
for (i = 0; i < count; i++)
nv40_render_vertex(nv40, prim->v[i]);

/* If it's likely we'll need to empty the push buffer soon, finish
* off the primitive now.
*/
if (AVAIL_RING(chan) < ((count * 20) + 6)) {
BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}

static void
nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
{
nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
}

static void
nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
{
nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
}

static void
nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
{
nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
}

static void
nv40_render_flush(struct draw_stage *draw, unsigned flags)
{
struct nv40_render_stage *rs = nv40_render_stage(draw);
struct nv40_context *nv40 = rs->nv40;
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

if (rs->prim != NV40TCL_BEGIN_END_STOP) {
BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}

static void
nv40_render_reset_stipple_counter(struct draw_stage *draw)
{
}

static void
nv40_render_destroy(struct draw_stage *draw)
{
FREE(draw);
}

static INLINE void
emit_mov(struct nv40_vertex_program *vp,
unsigned dst, unsigned src, unsigned vor, unsigned mask)
{
struct nv40_vertex_program_exec *inst;

vp->insns = realloc(vp->insns,
sizeof(struct nv40_vertex_program_exec) *
++vp->nr_insns);
inst = &vp->insns[vp->nr_insns - 1];

inst->data[0] = 0x401f9c6c;
inst->data[1] = 0x0040000d | (src << 8);
inst->data[2] = 0x8106c083;
inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
inst->const_index = -1;
inst->has_branch_offset = FALSE;

vp->ir |= (1 << src);
if (vor != ~0)
vp->or |= (1 << vor);
}

static struct nv40_vertex_program *
create_drawvp(struct nv40_context *nv40)
{
struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
unsigned i;

emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
for (i = 0; i < 8; i++)
emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);

vp->insns[vp->nr_insns - 1].data[3] |= 1;
vp->translated = TRUE;
return vp;
}

struct draw_stage *
nv40_draw_render_stage(struct nv40_context *nv40)
{
struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);

if (!nv40->swtnl.vertprog)
nv40->swtnl.vertprog = create_drawvp(nv40);

render->nv40 = nv40;
render->stage.draw = nv40->draw;
render->stage.point = nv40_render_point;
render->stage.line = nv40_render_line;
render->stage.tri = nv40_render_tri;
render->stage.flush = nv40_render_flush;
render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
render->stage.destroy = nv40_render_destroy;

return &render->stage;
}

void
nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf, unsigned idxbuf_size,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
unsigned i;
void *map;

if (!nv40_state_validate_swtnl(nv40))
return;
nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
nv40_state_emit(nv40);

for (i = 0; i < nv40->vtxbuf_nr; i++) {
map = pipe_buffer_map(pscreen, nv40->vtxbuf[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(nv40->draw, i, map);
}

if (idxbuf) {
map = pipe_buffer_map(pscreen, idxbuf,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
} else {
draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
}

if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];

map = pipe_buffer_map(pscreen,
nv40->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, 0,
map, nr);
}

draw_arrays(nv40->draw, mode, start, count);

for (i = 0; i < nv40->vtxbuf_nr; i++)
pipe_buffer_unmap(pscreen, nv40->vtxbuf[i].buffer);

if (idxbuf)
pipe_buffer_unmap(pscreen, idxbuf);

if (nv40->constbuf[PIPE_SHADER_VERTEX])
pipe_buffer_unmap(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX]);

draw_flush(nv40->draw);
pipe->flush(pipe, 0, NULL);
}

static INLINE void
emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
unsigned semantic, unsigned index)
{
unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index);
unsigned a = nv40->swtnl.nr_attribs++;

nv40->swtnl.hw[a] = hw;
nv40->swtnl.emit[a] = emit;
nv40->swtnl.draw[a] = draw_out;
}

static boolean
nv40_state_vtxfmt_validate(struct nv40_context *nv40)
{
struct nv40_fragment_program *fp = nv40->fragprog;
unsigned colour = 0, texcoords = 0, fog = 0, i;

/* Determine needed fragprog inputs */
for (i = 0; i < fp->info.num_inputs; i++) {
switch (fp->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
colour |= (1 << fp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_GENERIC:
texcoords |= (1 << fp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_FOG:
fog = 1;
break;
default:
assert(0);
}
}

nv40->swtnl.nr_attribs = 0;

/* Map draw vtxprog output to hw attribute IDs */
for (i = 0; i < 2; i++) {
if (!(colour & (1 << i)))
continue;
emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
}

for (i = 0; i < 8; i++) {
if (!(texcoords & (1 << i)))
continue;
emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
}

if (fog) {
emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
}

emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);

return FALSE;
}

struct nv40_state_entry nv40_state_vtxfmt = {
.validate = nv40_state_vtxfmt_validate,
.dirty = {
.pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
.hw = 0
}
};


+ 0
- 127
src/gallium/drivers/nv40/nv40_query.c 파일 보기

@@ -1,127 +0,0 @@
#include "pipe/p_context.h"

#include "nv40_context.h"

struct nv40_query {
struct nouveau_resource *object;
unsigned type;
boolean ready;
uint64_t result;
};

static INLINE struct nv40_query *
nv40_query(struct pipe_query *pipe)
{
return (struct nv40_query *)pipe;
}

static struct pipe_query *
nv40_query_create(struct pipe_context *pipe, unsigned query_type)
{
struct nv40_query *q;

q = CALLOC(1, sizeof(struct nv40_query));
q->type = query_type;

return (struct pipe_query *)q;
}

static void
nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_query *q = nv40_query(pq);

if (q->object)
nouveau_resource_free(&q->object);
FREE(q);
}

static void
nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);

/* Happens when end_query() is called, then another begin_query()
* without querying the result in-between. For now we'll wait for
* the existing query to notify completion, but it could be better.
*/
if (q->object) {
uint64_t tmp;
pipe->get_query_result(pipe, pq, 1, &tmp);
}

if (nouveau_resource_alloc(nv40->screen->query_heap, 1, NULL, &q->object))
assert(0);
nouveau_notifier_reset(nv40->screen->query, q->object->start);

BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1);
OUT_RING (chan, 1);

q->ready = FALSE;
}

static void
nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1);
OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
FIRE_RING(chan);
}

static boolean
nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, uint64_t *result)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);

assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);

if (!q->ready) {
unsigned status;

status = nouveau_notifier_status(nv40->screen->query,
q->object->start);
if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
if (wait == FALSE)
return FALSE;
nouveau_notifier_wait_status(nv40->screen->query,
q->object->start,
NV_NOTIFY_STATE_STATUS_COMPLETED,
0);
}

q->result = nouveau_notifier_return_val(nv40->screen->query,
q->object->start);
q->ready = TRUE;
nouveau_resource_free(&q->object);
}

*result = q->result;
return TRUE;
}

void
nv40_init_query_functions(struct nv40_context *nv40)
{
nv40->pipe.create_query = nv40_query_create;
nv40->pipe.destroy_query = nv40_query_destroy;
nv40->pipe.begin_query = nv40_query_begin;
nv40->pipe.end_query = nv40_query_end;
nv40->pipe.get_query_result = nv40_query_result;
}

+ 0
- 319
src/gallium/drivers/nv40/nv40_screen.c 파일 보기

@@ -1,319 +0,0 @@
#include "pipe/p_screen.h"

#include "nv40_context.h"
#include "nv40_screen.h"

#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
#define NV6X_GRCLASS4497_CHIPSETS 0x00000088

static int
nv40_screen_get_param(struct pipe_screen *pscreen, int param)
{
struct nv40_screen *screen = nv40_screen(pscreen);

switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 16;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
case PIPE_CAP_GLSL:
return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 4;
case PIPE_CAP_OCCLUSION_QUERY:
return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
return 1;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
return 13;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 10;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 13;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
return 0; /* We have 4 - but unsupported currently */
case PIPE_CAP_TGSI_CONT_SUPPORTED:
return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case NOUVEAU_CAP_HW_VTXBUF:
return 1;
case NOUVEAU_CAP_HW_IDXBUF:
if (screen->curie->grclass == NV40TCL)
return 1;
return 0;
case PIPE_CAP_INDEP_BLEND_ENABLE:
return 0;
case PIPE_CAP_INDEP_BLEND_FUNC:
return 0;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 16;
default:
NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
return 0;
}
}

static float
nv40_screen_get_paramf(struct pipe_screen *pscreen, int param)
{
switch (param) {
case PIPE_CAP_MAX_LINE_WIDTH:
case PIPE_CAP_MAX_LINE_WIDTH_AA:
return 10.0;
case PIPE_CAP_MAX_POINT_WIDTH:
case PIPE_CAP_MAX_POINT_WIDTH_AA:
return 64.0;
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
return 16.0;
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
return 16.0;
default:
NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
return 0.0;
}
}

static boolean
nv40_screen_surface_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned tex_usage, unsigned geom_flags)
{
if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
return TRUE;
default:
break;
}
} else
if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
switch (format) {
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_Z16_UNORM:
return TRUE;
default:
break;
}
} else {
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_R16_SNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_Z16_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
return TRUE;
default:
break;
}
}

return FALSE;
}

static struct pipe_buffer *
nv40_surface_buffer(struct pipe_surface *surf)
{
struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture;

return mt->buffer;
}

static void
nv40_screen_destroy(struct pipe_screen *pscreen)
{
struct nv40_screen *screen = nv40_screen(pscreen);
unsigned i;

for (i = 0; i < NV40_STATE_MAX; i++) {
if (screen->state[i])
so_ref(NULL, &screen->state[i]);
}

nouveau_resource_destroy(&screen->vp_exec_heap);
nouveau_resource_destroy(&screen->vp_data_heap);
nouveau_resource_destroy(&screen->query_heap);
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->curie);
nv04_surface_2d_takedown(&screen->eng2d);

nouveau_screen_fini(&screen->base);

FREE(pscreen);
}

struct pipe_screen *
nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen);
struct nouveau_channel *chan;
struct pipe_screen *pscreen;
struct nouveau_stateobj *so;
unsigned curie_class = 0;
int ret;

if (!screen)
return NULL;
pscreen = &screen->base.base;

ret = nouveau_screen_init(&screen->base, dev);
if (ret) {
nv40_screen_destroy(pscreen);
return NULL;
}
chan = screen->base.channel;

pscreen->winsys = ws;
pscreen->destroy = nv40_screen_destroy;
pscreen->get_param = nv40_screen_get_param;
pscreen->get_paramf = nv40_screen_get_paramf;
pscreen->is_format_supported = nv40_screen_surface_format_supported;
pscreen->context_create = nv40_create;

nv40_screen_init_miptree_functions(pscreen);

/* 3D object */
switch (dev->chipset & 0xf0) {
case 0x40:
if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
curie_class = NV40TCL;
else
if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
curie_class = NV44TCL;
break;
case 0x60:
if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
curie_class = NV44TCL;
break;
}

if (!curie_class) {
NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", dev->chipset);
return NULL;
}

ret = nouveau_grobj_alloc(chan, 0xbeef3097, curie_class, &screen->curie);
if (ret) {
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}

/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
screen->eng2d->buf = nv40_surface_buffer;

/* Notifier for sync purposes */
ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
if (ret) {
NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
nv40_screen_destroy(pscreen);
return NULL;
}

/* Query objects */
ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query);
if (ret) {
NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
nv40_screen_destroy(pscreen);
return NULL;
}

nouveau_resource_init(&screen->query_heap, 0, 32);
if (ret) {
NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
nv40_screen_destroy(pscreen);
return NULL;
}

/* Vtxprog resources */
if (nouveau_resource_init(&screen->vp_exec_heap, 0, 512) ||
nouveau_resource_init(&screen->vp_data_heap, 0, 256)) {
nv40_screen_destroy(pscreen);
return NULL;
}

/* Static curie initialisation */
so = so_new(16, 25, 0);
so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->gart->handle);
so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1);
so_data (so, chan->vram->handle);
so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->vram->handle);
so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->gart->handle);
so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2);
so_data (so, 0);
so_data (so, screen->query->handle);
so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->vram->handle);
so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->vram->handle);

so_method(so, screen->curie, 0x1ea4, 3);
so_data (so, 0x00000010);
so_data (so, 0x01000100);
so_data (so, 0xff800006);

/* vtxprog output routing */
so_method(so, screen->curie, 0x1fc4, 1);
so_data (so, 0x06144321);
so_method(so, screen->curie, 0x1fc8, 2);
so_data (so, 0xedcba987);
so_data (so, 0x00000021);
so_method(so, screen->curie, 0x1fd0, 1);
so_data (so, 0x00171615);
so_method(so, screen->curie, 0x1fd4, 1);
so_data (so, 0x001b1a19);

so_method(so, screen->curie, 0x1ef8, 1);
so_data (so, 0x0020ffff);
so_method(so, screen->curie, 0x1d64, 1);
so_data (so, 0x00d30000);
so_method(so, screen->curie, 0x1e94, 1);
so_data (so, 0x00000001);

so_emit(chan, so);
so_ref(NULL, &so);
nouveau_pushbuf_flush(chan, 0);

return pscreen;
}


+ 0
- 37
src/gallium/drivers/nv40/nv40_screen.h 파일 보기

@@ -1,37 +0,0 @@
#ifndef __NV40_SCREEN_H__
#define __NV40_SCREEN_H__

#include "nouveau/nouveau_screen.h"
#include "nouveau/nv04_surface_2d.h"

struct nv40_screen {
struct nouveau_screen base;

struct nouveau_winsys *nvws;

struct nv40_context *cur_ctx;

/* HW graphics objects */
struct nv04_surface_2d *eng2d;
struct nouveau_grobj *curie;
struct nouveau_notifier *sync;

/* Query object resources */
struct nouveau_notifier *query;
struct nouveau_resource *query_heap;

/* Vtxprog resources */
struct nouveau_resource *vp_exec_heap;
struct nouveau_resource *vp_data_heap;

/* Current 3D state of channel */
struct nouveau_stateobj *state[NV40_STATE_MAX];
};

static INLINE struct nv40_screen *
nv40_screen(struct pipe_screen *screen)
{
return (struct nv40_screen *)screen;
}

#endif

+ 0
- 556
src/gallium/drivers/nv40/nv40_shader.h 파일 보기

@@ -1,556 +0,0 @@
#ifndef __NV40_SHADER_H__
#define __NV40_SHADER_H__

/* Vertex programs instruction set
*
* The NV40 instruction set is very similar to NV30. Most fields are in
* a slightly different position in the instruction however.
*
* Merged instructions
* In some cases it is possible to put two instructions into one opcode
* slot. The rules for when this is OK is not entirely clear to me yet.
*
* There are separate writemasks and dest temp register fields for each
* grouping of instructions. There is however only one field with the
* ID of a result register. Writing to temp/result regs is selected by
* setting VEC_RESULT/SCA_RESULT.
*
* Temporary registers
* The source/dest temp register fields have been extended by 1 bit, to
* give a total of 32 temporary registers.
*
* Relative Addressing
* NV40 can use an address register to index into vertex attribute regs.
* This is done by putting the offset value into INPUT_SRC and setting
* the INDEX_INPUT flag.
*
* Conditional execution (see NV_vertex_program{2,3} for details)
* There is a second condition code register on NV40, it's use is enabled
* by setting the COND_REG_SELECT_1 flag.
*
* Texture lookup
* TODO
*/

/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
#define NV40_VP_INST_VEC_RESULT (1 << 30)
/* uncertain.. */
#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
/* use address reg as index into attribs */
#define NV40_VP_INST_INDEX_INPUT (1 << 27)
#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
#define NV40_VP_INST_SRC2_ABS (1 << 23)
#define NV40_VP_INST_SRC1_ABS (1 << 22)
#define NV40_VP_INST_SRC0_ABS (1 << 21)
#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
#define NV40_VP_INST_COND_SHIFT 10
#define NV40_VP_INST_COND_MASK (0x7 << 10)
# define NV40_VP_INST_COND_FL 0
# define NV40_VP_INST_COND_LT 1
# define NV40_VP_INST_COND_EQ 2
# define NV40_VP_INST_COND_LE 3
# define NV40_VP_INST_COND_GT 4
# define NV40_VP_INST_COND_NE 5
# define NV40_VP_INST_COND_GE 6
# define NV40_VP_INST_COND_TR 7
#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
#define NV40_VP_INST0_KNOWN ( \
NV40_VP_INST_INDEX_INPUT | \
NV40_VP_INST_COND_REG_SELECT_1 | \
NV40_VP_INST_ADDR_REG_SELECT_1 | \
NV40_VP_INST_SRC2_ABS | \
NV40_VP_INST_SRC1_ABS | \
NV40_VP_INST_SRC0_ABS | \
NV40_VP_INST_VEC_DEST_TEMP_MASK | \
NV40_VP_INST_COND_TEST_ENABLE | \
NV40_VP_INST_COND_MASK | \
NV40_VP_INST_COND_SWZ_ALL_MASK | \
NV40_VP_INST_ADDR_SWZ_MASK)

/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
# define NV40_VP_INST_OP_NOP 0x00
# define NV40_VP_INST_OP_MOV 0x01
# define NV40_VP_INST_OP_MUL 0x02
# define NV40_VP_INST_OP_ADD 0x03
# define NV40_VP_INST_OP_MAD 0x04
# define NV40_VP_INST_OP_DP3 0x05
# define NV40_VP_INST_OP_DPH 0x06
# define NV40_VP_INST_OP_DP4 0x07
# define NV40_VP_INST_OP_DST 0x08
# define NV40_VP_INST_OP_MIN 0x09
# define NV40_VP_INST_OP_MAX 0x0A
# define NV40_VP_INST_OP_SLT 0x0B
# define NV40_VP_INST_OP_SGE 0x0C
# define NV40_VP_INST_OP_ARL 0x0D
# define NV40_VP_INST_OP_FRC 0x0E
# define NV40_VP_INST_OP_FLR 0x0F
# define NV40_VP_INST_OP_SEQ 0x10
# define NV40_VP_INST_OP_SFL 0x11
# define NV40_VP_INST_OP_SGT 0x12
# define NV40_VP_INST_OP_SLE 0x13
# define NV40_VP_INST_OP_SNE 0x14
# define NV40_VP_INST_OP_STR 0x15
# define NV40_VP_INST_OP_SSG 0x16
# define NV40_VP_INST_OP_ARR 0x17
# define NV40_VP_INST_OP_ARA 0x18
# define NV40_VP_INST_OP_TXL 0x19
#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
# define NV40_VP_INST_OP_NOP 0x00
# define NV40_VP_INST_OP_MOV 0x01
# define NV40_VP_INST_OP_RCP 0x02
# define NV40_VP_INST_OP_RCC 0x03
# define NV40_VP_INST_OP_RSQ 0x04
# define NV40_VP_INST_OP_EXP 0x05
# define NV40_VP_INST_OP_LOG 0x06
# define NV40_VP_INST_OP_LIT 0x07
# define NV40_VP_INST_OP_BRA 0x09
# define NV40_VP_INST_OP_CAL 0x0B
# define NV40_VP_INST_OP_RET 0x0C
# define NV40_VP_INST_OP_LG2 0x0D
# define NV40_VP_INST_OP_EX2 0x0E
# define NV40_VP_INST_OP_SIN 0x0F
# define NV40_VP_INST_OP_COS 0x10
# define NV40_VP_INST_OP_PUSHA 0x13
# define NV40_VP_INST_OP_POPA 0x14
#define NV40_VP_INST_CONST_SRC_SHIFT 12
#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
#define NV40_VP_INST_INPUT_SRC_SHIFT 8
#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
# define NV40_VP_INST_IN_POS 0
# define NV40_VP_INST_IN_WEIGHT 1
# define NV40_VP_INST_IN_NORMAL 2
# define NV40_VP_INST_IN_COL0 3
# define NV40_VP_INST_IN_COL1 4
# define NV40_VP_INST_IN_FOGC 5
# define NV40_VP_INST_IN_TC0 8
# define NV40_VP_INST_IN_TC(n) (8+n)
#define NV40_VP_INST_SRC0H_SHIFT 0
#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
#define NV40_VP_INST1_KNOWN ( \
NV40_VP_INST_VEC_OPCODE_MASK | \
NV40_VP_INST_SCA_OPCODE_MASK | \
NV40_VP_INST_CONST_SRC_MASK | \
NV40_VP_INST_INPUT_SRC_MASK | \
NV40_VP_INST_SRC0H_MASK \
)

/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
#define NV40_VP_INST_SRC0L_SHIFT 23
#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
#define NV40_VP_INST_SRC1_SHIFT 6
#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
#define NV40_VP_INST_SRC2H_SHIFT 0
#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
#define NV40_VP_INST_IADDRH_SHIFT 0
#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)

/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
#define NV40_VP_INST_IADDRL_SHIFT 29
#define NV40_VP_INST_IADDRL_MASK (7 << 29)
#define NV40_VP_INST_SRC2L_SHIFT 21
#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
#define NV40_VP_INST_SCA_RESULT (1 << 12)
#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
#define NV40_VP_INST_DEST_SHIFT 2
#define NV40_VP_INST_DEST_MASK (31 << 2)
# define NV40_VP_INST_DEST_POS 0
# define NV40_VP_INST_DEST_COL0 1
# define NV40_VP_INST_DEST_COL1 2
# define NV40_VP_INST_DEST_BFC0 3
# define NV40_VP_INST_DEST_BFC1 4
# define NV40_VP_INST_DEST_FOGC 5
# define NV40_VP_INST_DEST_PSZ 6
# define NV40_VP_INST_DEST_TC0 7
# define NV40_VP_INST_DEST_TC(n) (7+n)
# define NV40_VP_INST_DEST_TEMP 0x1F
#define NV40_VP_INST_INDEX_CONST (1 << 1)
#define NV40_VP_INST_LAST (1 << 0)
#define NV40_VP_INST3_KNOWN ( \
NV40_VP_INST_SRC2L_MASK |\
NV40_VP_INST_SCA_WRITEMASK_MASK |\
NV40_VP_INST_VEC_WRITEMASK_MASK |\
NV40_VP_INST_SCA_DEST_TEMP_MASK |\
NV40_VP_INST_DEST_MASK |\
NV40_VP_INST_INDEX_CONST)

/* Useful to split the source selection regs into their pieces */
#define NV40_VP_SRC0_HIGH_SHIFT 9
#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
#define NV40_VP_SRC0_LOW_MASK 0x000001FF
#define NV40_VP_SRC2_HIGH_SHIFT 11
#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
#define NV40_VP_SRC2_LOW_MASK 0x000007FF

/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
#define NV40_VP_SRC_NEGATE (1 << 16)
#define NV40_VP_SRC_SWZ_X_SHIFT 14
#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
#define NV40_VP_SRC_SWZ_Y_SHIFT 12
#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
#define NV40_VP_SRC_SWZ_Z_SHIFT 10
#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
#define NV40_VP_SRC_SWZ_W_SHIFT 8
#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
#define NV40_VP_SRC_REG_TYPE_SHIFT 0
#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
# define NV40_VP_SRC_REG_TYPE_UNK0 0
# define NV40_VP_SRC_REG_TYPE_TEMP 1
# define NV40_VP_SRC_REG_TYPE_INPUT 2
# define NV40_VP_SRC_REG_TYPE_CONST 3


/*
* Each fragment program opcode appears to be comprised of 4 32-bit values.
*
* 0 - Opcode, output reg/mask, ATTRIB source
* 1 - Source 0
* 2 - Source 1
* 3 - Source 2
*
* There appears to be no special difference between result regs and temp regs.
* result.color == R0.xyzw
* result.depth == R1.z
* When the fragprog contains instructions to write depth,
* NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
*
* Constants are inserted directly after the instruction that uses them.
*
* It appears that it's not possible to use two input registers in one
* instruction as the input sourcing is done in the instruction dword
* and not the source selection dwords. As such instructions such as:
*
* ADD result.color, fragment.color, fragment.texcoord[0];
*
* must be split into two MOV's and then an ADD (nvidia does this) but
* I'm not sure why it's not just one MOV and then source the second input
* in the ADD instruction..
*
* Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
* negation requires multiplication with a const.
*
* Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
* SWIZZLE_ONE.
*
* The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
* SWIZZLE_ZERO is implemented simply by not writing to the relevant components
* of the destination.
*
* Looping
* Loops appear to be fairly expensive on NV40 at least, the proprietary
* driver goes to a lot of effort to avoid using the native looping
* instructions. If the total number of *executed* instructions between
* REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
* The maximum loop count is 255.
*
* Conditional execution
* TODO
*
* Non-native instructions:
* LIT
* LRP - MAD+MAD
* SUB - ADD, negate second source
* RSQ - LG2 + EX2
* POW - LG2 + MUL + EX2
* SCS - COS + SIN
* XPD
* DP2 - MUL + ADD
* NRM
*/

//== Opcode / Destination selection ==
#define NV40_FP_OP_PROGRAM_END (1 << 0)
#define NV40_FP_OP_OUT_REG_SHIFT 1
#define NV40_FP_OP_OUT_REG_MASK (63 << 1)
/* Needs to be set when writing outputs to get expected result.. */
#define NV40_FP_OP_OUT_REG_HALF (1 << 7)
#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8)
#define NV40_FP_OP_OUTMASK_SHIFT 9
#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
# define NV40_FP_OP_OUT_X (1 << 9)
# define NV40_FP_OP_OUT_Y (1 <<10)
# define NV40_FP_OP_OUT_Z (1 <<11)
# define NV40_FP_OP_OUT_W (1 <<12)
/* Uncertain about these, especially the input_src values.. it's possible that
* they can be dynamically changed.
*/
#define NV40_FP_OP_INPUT_SRC_SHIFT 13
#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
# define NV40_FP_OP_INPUT_SRC_COL0 0x1
# define NV40_FP_OP_INPUT_SRC_COL1 0x2
# define NV40_FP_OP_INPUT_SRC_FOGC 0x3
# define NV40_FP_OP_INPUT_SRC_TC0 0x4
# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
# define NV40_FP_OP_INPUT_SRC_FACING 0xE
#define NV40_FP_OP_TEX_UNIT_SHIFT 17
#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17)
#define NV40_FP_OP_PRECISION_SHIFT 22
#define NV40_FP_OP_PRECISION_MASK (3 << 22)
# define NV40_FP_PRECISION_FP32 0
# define NV40_FP_PRECISION_FP16 1
# define NV40_FP_PRECISION_FX12 2
#define NV40_FP_OP_OPCODE_SHIFT 24
#define NV40_FP_OP_OPCODE_MASK (0x3F << 24)
# define NV40_FP_OP_OPCODE_NOP 0x00
# define NV40_FP_OP_OPCODE_MOV 0x01
# define NV40_FP_OP_OPCODE_MUL 0x02
# define NV40_FP_OP_OPCODE_ADD 0x03
# define NV40_FP_OP_OPCODE_MAD 0x04
# define NV40_FP_OP_OPCODE_DP3 0x05
# define NV40_FP_OP_OPCODE_DP4 0x06
# define NV40_FP_OP_OPCODE_DST 0x07
# define NV40_FP_OP_OPCODE_MIN 0x08
# define NV40_FP_OP_OPCODE_MAX 0x09
# define NV40_FP_OP_OPCODE_SLT 0x0A
# define NV40_FP_OP_OPCODE_SGE 0x0B
# define NV40_FP_OP_OPCODE_SLE 0x0C
# define NV40_FP_OP_OPCODE_SGT 0x0D
# define NV40_FP_OP_OPCODE_SNE 0x0E
# define NV40_FP_OP_OPCODE_SEQ 0x0F
# define NV40_FP_OP_OPCODE_FRC 0x10
# define NV40_FP_OP_OPCODE_FLR 0x11
# define NV40_FP_OP_OPCODE_KIL 0x12
# define NV40_FP_OP_OPCODE_PK4B 0x13
# define NV40_FP_OP_OPCODE_UP4B 0x14
/* DDX/DDY can only write to XY */
# define NV40_FP_OP_OPCODE_DDX 0x15
# define NV40_FP_OP_OPCODE_DDY 0x16
# define NV40_FP_OP_OPCODE_TEX 0x17
# define NV40_FP_OP_OPCODE_TXP 0x18
# define NV40_FP_OP_OPCODE_TXD 0x19
# define NV40_FP_OP_OPCODE_RCP 0x1A
# define NV40_FP_OP_OPCODE_EX2 0x1C
# define NV40_FP_OP_OPCODE_LG2 0x1D
# define NV40_FP_OP_OPCODE_STR 0x20
# define NV40_FP_OP_OPCODE_SFL 0x21
# define NV40_FP_OP_OPCODE_COS 0x22
# define NV40_FP_OP_OPCODE_SIN 0x23
# define NV40_FP_OP_OPCODE_PK2H 0x24
# define NV40_FP_OP_OPCODE_UP2H 0x25
# define NV40_FP_OP_OPCODE_PK4UB 0x27
# define NV40_FP_OP_OPCODE_UP4UB 0x28
# define NV40_FP_OP_OPCODE_PK2US 0x29
# define NV40_FP_OP_OPCODE_UP2US 0x2A
# define NV40_FP_OP_OPCODE_DP2A 0x2E
# define NV40_FP_OP_OPCODE_TXL 0x2F
# define NV40_FP_OP_OPCODE_TXB 0x31
# define NV40_FP_OP_OPCODE_DIV 0x3A
# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C
/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
# define NV40_FP_OP_BRA_OPCODE_BRK 0x0
# define NV40_FP_OP_BRA_OPCODE_CAL 0x1
# define NV40_FP_OP_BRA_OPCODE_IF 0x2
# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
# define NV40_FP_OP_BRA_OPCODE_REP 0x4
# define NV40_FP_OP_BRA_OPCODE_RET 0x5
#define NV40_FP_OP_OUT_SAT (1 << 31)

/* high order bits of SRC0 */
#define NV40_FP_OP_OUT_ABS (1 << 29)
#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
#define NV40_FP_OP_COND_SHIFT 18
#define NV40_FP_OP_COND_MASK (0x07 << 18)
# define NV40_FP_OP_COND_FL 0
# define NV40_FP_OP_COND_LT 1
# define NV40_FP_OP_COND_EQ 2
# define NV40_FP_OP_COND_LE 3
# define NV40_FP_OP_COND_GT 4
# define NV40_FP_OP_COND_NE 5
# define NV40_FP_OP_COND_GE 6
# define NV40_FP_OP_COND_TR 7

/* high order bits of SRC1 */
#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
#define NV40_FP_OP_DST_SCALE_SHIFT 28
#define NV40_FP_OP_DST_SCALE_MASK (3 << 28)
#define NV40_FP_OP_DST_SCALE_1X 0
#define NV40_FP_OP_DST_SCALE_2X 1
#define NV40_FP_OP_DST_SCALE_4X 2
#define NV40_FP_OP_DST_SCALE_8X 3
#define NV40_FP_OP_DST_SCALE_INV_2X 5
#define NV40_FP_OP_DST_SCALE_INV_4X 6
#define NV40_FP_OP_DST_SCALE_INV_8X 7

/* SRC1 LOOP */
#define NV40_FP_OP_LOOP_INCR_SHIFT 19
#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)

/* SRC1 IF */
#define NV40_FP_OP_ELSE_ID_SHIFT 2
#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)

/* SRC1 CAL */
#define NV40_FP_OP_IADDR_SHIFT 2
#define NV40_FP_OP_IADDR_MASK (0xFF << 2)

/* SRC1 REP
* I have no idea why there are 3 count values here.. but they
* have always been filled with the same value in my tests so
* far..
*/
#define NV40_FP_OP_REP_COUNT1_SHIFT 2
#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
#define NV40_FP_OP_REP_COUNT2_SHIFT 10
#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
#define NV40_FP_OP_REP_COUNT3_SHIFT 19
#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)

/* SRC2 REP/IF */
#define NV40_FP_OP_END_ID_SHIFT 2
#define NV40_FP_OP_END_ID_MASK (0xFF << 2)

// SRC2 high-order
#define NV40_FP_OP_INDEX_INPUT (1 << 30)
#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)

//== Register selection ==
#define NV40_FP_REG_TYPE_SHIFT 0
#define NV40_FP_REG_TYPE_MASK (3 << 0)
# define NV40_FP_REG_TYPE_TEMP 0
# define NV40_FP_REG_TYPE_INPUT 1
# define NV40_FP_REG_TYPE_CONST 2
#define NV40_FP_REG_SRC_SHIFT 2
#define NV40_FP_REG_SRC_MASK (63 << 2)
#define NV40_FP_REG_SRC_HALF (1 << 8)
#define NV40_FP_REG_SWZ_ALL_SHIFT 9
#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
#define NV40_FP_REG_SWZ_X_SHIFT 9
#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
#define NV40_FP_REG_SWZ_Y_SHIFT 11
#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
#define NV40_FP_REG_SWZ_Z_SHIFT 13
#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
#define NV40_FP_REG_SWZ_W_SHIFT 15
#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
# define NV40_FP_SWIZZLE_X 0
# define NV40_FP_SWIZZLE_Y 1
# define NV40_FP_SWIZZLE_Z 2
# define NV40_FP_SWIZZLE_W 3
#define NV40_FP_REG_NEGATE (1 << 17)

#ifndef NV40_SHADER_NO_FUCKEDNESS
#define NV40SR_NONE 0
#define NV40SR_OUTPUT 1
#define NV40SR_INPUT 2
#define NV40SR_TEMP 3
#define NV40SR_CONST 4

struct nv40_sreg {
int type;
int index;

int dst_scale;

int negate;
int abs;
int swz[4];

int cc_update;
int cc_update_reg;
int cc_test;
int cc_test_reg;
int cc_swz[4];
};

static INLINE struct nv40_sreg
nv40_sr(int type, int index)
{
struct nv40_sreg temp = {
.type = type,
.index = index,
.dst_scale = DEF_SCALE,
.abs = 0,
.negate = 0,
.swz = { 0, 1, 2, 3 },
.cc_update = 0,
.cc_update_reg = 0,
.cc_test = DEF_CTEST,
.cc_test_reg = 0,
.cc_swz = { 0, 1, 2, 3 },
};
return temp;
}

static INLINE struct nv40_sreg
nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
{
struct nv40_sreg dst = src;

dst.swz[SWZ_X] = src.swz[x];
dst.swz[SWZ_Y] = src.swz[y];
dst.swz[SWZ_Z] = src.swz[z];
dst.swz[SWZ_W] = src.swz[w];
return dst;
}

static INLINE struct nv40_sreg
nv40_sr_neg(struct nv40_sreg src)
{
src.negate = !src.negate;
return src;
}

static INLINE struct nv40_sreg
nv40_sr_abs(struct nv40_sreg src)
{
src.abs = 1;
return src;
}

static INLINE struct nv40_sreg
nv40_sr_scale(struct nv40_sreg src, int scale)
{
src.dst_scale = scale;
return src;
}
#endif

#endif

+ 0
- 797
src/gallium/drivers/nv40/nv40_state.c 파일 보기

@@ -1,797 +0,0 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"

#include "draw/draw_context.h"

#include "tgsi/tgsi_parse.h"

#include "nv40_context.h"
#include "nv40_state.h"

static void *
nv40_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nouveau_grobj *curie = nv40->screen->curie;
struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
struct nouveau_stateobj *so = so_new(5, 8, 0);

if (cso->rt[0].blend_enable) {
so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
so_data (so, 1);
so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) |
nvgl_blend_func(cso->rt[0].rgb_src_factor));
so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 |
nvgl_blend_func(cso->rt[0].rgb_dst_factor));
so_method(so, curie, NV40TCL_BLEND_EQUATION, 1);
so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 |
nvgl_blend_eqn(cso->rt[0].rgb_func));
} else {
so_method(so, curie, NV40TCL_BLEND_ENABLE, 1);
so_data (so, 0);
}

so_method(so, curie, NV40TCL_COLOR_MASK, 1);
so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));

if (cso->logicop_enable) {
so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
so_data (so, 1);
so_data (so, nvgl_logicop_func(cso->logicop_func));
} else {
so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1);
so_data (so, 0);
}

so_method(so, curie, NV40TCL_DITHER_ENABLE, 1);
so_data (so, cso->dither ? 1 : 0);

so_ref(so, &bso->so);
so_ref(NULL, &so);
bso->pipe = *cso;
return (void *)bso;
}

static void
nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->blend = hwcso;
nv40->dirty |= NV40_NEW_BLEND;
}

static void
nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv40_blend_state *bso = hwcso;

so_ref(NULL, &bso->so);
FREE(bso);
}


static INLINE unsigned
wrap_mode(unsigned wrap) {
unsigned ret;

switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
ret = NV40TCL_TEX_WRAP_S_REPEAT;
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT;
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_CLAMP:
ret = NV40TCL_TEX_WRAP_S_CLAMP;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
break;
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
ret = NV40TCL_TEX_WRAP_S_REPEAT;
break;
}

return ret >> NV40TCL_TEX_WRAP_S_SHIFT;
}

static void *
nv40_sampler_state_create(struct pipe_context *pipe,
const struct pipe_sampler_state *cso)
{
struct nv40_sampler_state *ps;
uint32_t filter = 0;

ps = MALLOC(sizeof(struct nv40_sampler_state));

ps->fmt = 0;
if (!cso->normalized_coords)
ps->fmt |= NV40TCL_TEX_FORMAT_RECT;

ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) |
(wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) |
(wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT));

ps->en = 0;
if (cso->max_anisotropy >= 2) {
/* no idea, binary driver sets it, works without it.. meh.. */
ps->wrap |= (1 << 5);

if (cso->max_anisotropy >= 16) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
} else
if (cso->max_anisotropy >= 12) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
} else
if (cso->max_anisotropy >= 10) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
} else
if (cso->max_anisotropy >= 8) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
} else
if (cso->max_anisotropy >= 6) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
} else
if (cso->max_anisotropy >= 4) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
} else {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
}
}

switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
filter |= NV40TCL_TEX_FILTER_MAG_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
filter |= NV40TCL_TEX_FILTER_MAG_NEAREST;
break;
}

switch (cso->min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
filter |= NV40TCL_TEX_FILTER_MIN_LINEAR;
break;
}
break;
case PIPE_TEX_FILTER_NEAREST:
default:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
filter |= NV40TCL_TEX_FILTER_MIN_NEAREST;
break;
}
break;
}

ps->filt = filter;

{
float limit;

limit = CLAMP(cso->lod_bias, -16.0, 15.0);
ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;

limit = CLAMP(cso->max_lod, 0.0, 15.0);
ps->en |= (int)(limit * 256.0) << 7;

limit = CLAMP(cso->min_lod, 0.0, 15.0);
ps->en |= (int)(limit * 256.0) << 19;
}


if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
switch (cso->compare_func) {
case PIPE_FUNC_NEVER:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER;
break;
case PIPE_FUNC_GREATER:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER;
break;
case PIPE_FUNC_EQUAL:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL;
break;
case PIPE_FUNC_GEQUAL:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL;
break;
case PIPE_FUNC_LESS:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS;
break;
case PIPE_FUNC_NOTEQUAL:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL;
break;
case PIPE_FUNC_LEQUAL:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL;
break;
case PIPE_FUNC_ALWAYS:
ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS;
break;
default:
break;
}
}

ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
(float_to_ubyte(cso->border_color[0]) << 16) |
(float_to_ubyte(cso->border_color[1]) << 8) |
(float_to_ubyte(cso->border_color[2]) << 0));

return (void *)ps;
}

static void
nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
{
struct nv40_context *nv40 = nv40_context(pipe);
unsigned unit;

for (unit = 0; unit < nr; unit++) {
nv40->tex_sampler[unit] = sampler[unit];
nv40->dirty_samplers |= (1 << unit);
}

for (unit = nr; unit < nv40->nr_samplers; unit++) {
nv40->tex_sampler[unit] = NULL;
nv40->dirty_samplers |= (1 << unit);
}

nv40->nr_samplers = nr;
nv40->dirty |= NV40_NEW_SAMPLER;
}

static void
nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}

static void
nv40_set_fragment_sampler_views(struct pipe_context *pipe,
unsigned nr,
struct pipe_sampler_view **views)
{
struct nv40_context *nv40 = nv40_context(pipe);
unsigned unit;

for (unit = 0; unit < nr; unit++) {
pipe_sampler_view_reference(&nv40->fragment_sampler_views[unit], views[unit]);
pipe_texture_reference((struct pipe_texture **)
&nv40->tex_miptree[unit], views[unit]->texture);
nv40->dirty_samplers |= (1 << unit);
}

for (unit = nr; unit < nv40->nr_textures; unit++) {
pipe_sampler_view_reference(&nv40->fragment_sampler_views[unit], NULL);
pipe_texture_reference((struct pipe_texture **)
&nv40->tex_miptree[unit], NULL);
nv40->dirty_samplers |= (1 << unit);
}

nv40->nr_textures = nr;
nv40->dirty |= NV40_NEW_SAMPLER;
}

static struct pipe_sampler_view *
nv40_create_sampler_view(struct pipe_context *pipe,
struct pipe_texture *texture,
const struct pipe_sampler_view *templ)
{
struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);

if (view) {
*view = *templ;
view->reference.count = 1;
view->texture = NULL;
pipe_texture_reference(&view->texture, texture);
view->context = pipe;
}

return view;
}


static void
nv40_sampler_view_destroy(struct pipe_context *pipe,
struct pipe_sampler_view *view)
{
pipe_texture_reference(&view->texture, NULL);
FREE(view);
}

static void *
nv40_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
struct nouveau_stateobj *so = so_new(9, 19, 0);
struct nouveau_grobj *curie = nv40->screen->curie;

/*XXX: ignored:
* light_twoside
* point_smooth -nohw
* multisample
*/

so_method(so, curie, NV40TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT :
NV40TCL_SHADE_MODEL_SMOOTH);

so_method(so, curie, NV40TCL_LINE_WIDTH, 2);
so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
so_data (so, cso->line_smooth ? 1 : 0);
so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2);
so_data (so, cso->line_stipple_enable ? 1 : 0);
so_data (so, (cso->line_stipple_pattern << 16) |
cso->line_stipple_factor);

so_method(so, curie, NV40TCL_POINT_SIZE, 1);
so_data (so, fui(cso->point_size));

so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6);
if (cso->front_winding == PIPE_WINDING_CCW) {
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
so_data(so, nvgl_polygon_mode(cso->fill_cw));
switch (cso->cull_mode) {
case PIPE_WINDING_CCW:
so_data(so, NV40TCL_CULL_FACE_FRONT);
break;
case PIPE_WINDING_CW:
so_data(so, NV40TCL_CULL_FACE_BACK);
break;
case PIPE_WINDING_BOTH:
so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, NV40TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV40TCL_FRONT_FACE_CCW);
} else {
so_data(so, nvgl_polygon_mode(cso->fill_cw));
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
switch (cso->cull_mode) {
case PIPE_WINDING_CCW:
so_data(so, NV40TCL_CULL_FACE_BACK);
break;
case PIPE_WINDING_CW:
so_data(so, NV40TCL_CULL_FACE_FRONT);
break;
case PIPE_WINDING_BOTH:
so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, NV40TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV40TCL_FRONT_FACE_CW);
}
so_data(so, cso->poly_smooth ? 1 : 0);
so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);

so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, cso->poly_stipple_enable ? 1 : 0);

so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
so_data(so, 1);
else
so_data(so, 0);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
so_data(so, 1);
else
so_data(so, 0);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
so_data(so, 1);
else
so_data(so, 0);
if (cso->offset_cw || cso->offset_ccw) {
so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2);
so_data (so, fui(cso->offset_scale));
so_data (so, fui(cso->offset_units * 2));
}

so_method(so, curie, NV40TCL_POINT_SPRITE, 1);
if (cso->point_quad_rasterization) {
unsigned psctl = (1 << 0), i;

for (i = 0; i < 8; i++) {
if ((cso->sprite_coord_enable >> i) & 1)
psctl |= (1 << (8 + i));
}

so_data(so, psctl);
} else {
so_data(so, 0);
}

so_ref(so, &rsso->so);
so_ref(NULL, &so);
rsso->pipe = *cso;
return (void *)rsso;
}

static void
nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->rasterizer = hwcso;
nv40->dirty |= NV40_NEW_RAST;
nv40->draw_dirty |= NV40_NEW_RAST;
}

static void
nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv40_rasterizer_state *rsso = hwcso;

so_ref(NULL, &rsso->so);
FREE(rsso);
}

static void *
nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *cso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
struct nouveau_stateobj *so = so_new(6, 20, 0);
struct nouveau_grobj *curie = nv40->screen->curie;

so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
so_data (so, nvgl_comparison_op(cso->depth.func));
so_data (so, cso->depth.writemask ? 1 : 0);
so_data (so, cso->depth.enabled ? 1 : 0);

so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
so_data (so, cso->alpha.enabled ? 1 : 0);
so_data (so, nvgl_comparison_op(cso->alpha.func));
so_data (so, float_to_ubyte(cso->alpha.ref_value));

if (cso->stencil[0].enabled) {
so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 3);
so_data (so, cso->stencil[0].enabled ? 1 : 0);
so_data (so, cso->stencil[0].writemask);
so_data (so, nvgl_comparison_op(cso->stencil[0].func));
so_method(so, curie, NV40TCL_STENCIL_FRONT_FUNC_MASK, 4);
so_data (so, cso->stencil[0].valuemask);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
} else {
so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}

if (cso->stencil[1].enabled) {
so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 3);
so_data (so, cso->stencil[1].enabled ? 1 : 0);
so_data (so, cso->stencil[1].writemask);
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
so_method(so, curie, NV40TCL_STENCIL_BACK_FUNC_MASK, 4);
so_data (so, cso->stencil[1].valuemask);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
} else {
so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}

so_ref(so, &zsaso->so);
so_ref(NULL, &so);
zsaso->pipe = *cso;
return (void *)zsaso;
}

static void
nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->zsa = hwcso;
nv40->dirty |= NV40_NEW_ZSA;
}

static void
nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv40_zsa_state *zsaso = hwcso;

so_ref(NULL, &zsaso->so);
FREE(zsaso);
}

static void *
nv40_vp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp;

vp = CALLOC(1, sizeof(struct nv40_vertex_program));
vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe);

return (void *)vp;
}

static void
nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->vertprog = hwcso;
nv40->dirty |= NV40_NEW_VERTPROG;
nv40->draw_dirty |= NV40_NEW_VERTPROG;
}

static void
nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp = hwcso;

draw_delete_vertex_shader(nv40->draw, vp->draw);
nv40_vertprog_destroy(nv40, vp);
FREE((void*)vp->pipe.tokens);
FREE(vp);
}

static void *
nv40_fp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
struct nv40_fragment_program *fp;

fp = CALLOC(1, sizeof(struct nv40_fragment_program));
fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);

tgsi_scan_shader(fp->pipe.tokens, &fp->info);

return (void *)fp;
}

static void
nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->fragprog = hwcso;
nv40->dirty |= NV40_NEW_FRAGPROG;
}

static void
nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_fragment_program *fp = hwcso;

nv40_fragprog_destroy(nv40, fp);
FREE((void*)fp->pipe.tokens);
FREE(fp);
}

static void
nv40_set_blend_color(struct pipe_context *pipe,
const struct pipe_blend_color *bcol)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->blend_colour = *bcol;
nv40->dirty |= NV40_NEW_BCOL;
}

static void
nv40_set_stencil_ref(struct pipe_context *pipe,
const struct pipe_stencil_ref *sr)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->stencil_ref = *sr;
nv40->dirty |= NV40_NEW_SR;
}

static void
nv40_set_clip_state(struct pipe_context *pipe,
const struct pipe_clip_state *clip)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->clip = *clip;
nv40->dirty |= NV40_NEW_UCP;
nv40->draw_dirty |= NV40_NEW_UCP;
}

static void
nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_buffer *buf )
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->constbuf[shader] = buf;
nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float));

if (shader == PIPE_SHADER_VERTEX) {
nv40->dirty |= NV40_NEW_VERTPROG;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
nv40->dirty |= NV40_NEW_FRAGPROG;
}
}

static void
nv40_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->framebuffer = *fb;
nv40->dirty |= NV40_NEW_FB;
}

static void
nv40_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *stipple)
{
struct nv40_context *nv40 = nv40_context(pipe);

memcpy(nv40->stipple, stipple->stipple, 4 * 32);
nv40->dirty |= NV40_NEW_STIPPLE;
}

static void
nv40_set_scissor_state(struct pipe_context *pipe,
const struct pipe_scissor_state *s)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->scissor = *s;
nv40->dirty |= NV40_NEW_SCISSOR;
}

static void
nv40_set_viewport_state(struct pipe_context *pipe,
const struct pipe_viewport_state *vpt)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->viewport = *vpt;
nv40->dirty |= NV40_NEW_VIEWPORT;
nv40->draw_dirty |= NV40_NEW_VIEWPORT;
}

static void
nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
const struct pipe_vertex_buffer *vb)
{
struct nv40_context *nv40 = nv40_context(pipe);

memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count);
nv40->vtxbuf_nr = count;

nv40->dirty |= NV40_NEW_ARRAYS;
nv40->draw_dirty |= NV40_NEW_ARRAYS;
}

static void *
nv40_vtxelts_state_create(struct pipe_context *pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
{
struct nv40_vtxelt_state *cso = CALLOC_STRUCT(nv40_vtxelt_state);

assert(num_elements < 16); /* not doing fallbacks yet */
cso->num_elements = num_elements;
memcpy(cso->pipe, elements, num_elements * sizeof(*elements));

/* nv40_vtxelt_construct(cso);*/

return (void *)cso;
}

static void
nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}

static void
nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);

nv40->vtxelt = hwcso;
nv40->dirty |= NV40_NEW_ARRAYS;
nv40->draw_dirty |= NV40_NEW_ARRAYS;
}

void
nv40_init_state_functions(struct nv40_context *nv40)
{
nv40->pipe.create_blend_state = nv40_blend_state_create;
nv40->pipe.bind_blend_state = nv40_blend_state_bind;
nv40->pipe.delete_blend_state = nv40_blend_state_delete;

nv40->pipe.create_sampler_state = nv40_sampler_state_create;
nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind;
nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
nv40->pipe.set_fragment_sampler_views = nv40_set_fragment_sampler_views;
nv40->pipe.create_sampler_view = nv40_create_sampler_view;
nv40->pipe.sampler_view_destroy = nv40_sampler_view_destroy;

nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete;

nv40->pipe.create_depth_stencil_alpha_state =
nv40_depth_stencil_alpha_state_create;
nv40->pipe.bind_depth_stencil_alpha_state =
nv40_depth_stencil_alpha_state_bind;
nv40->pipe.delete_depth_stencil_alpha_state =
nv40_depth_stencil_alpha_state_delete;

nv40->pipe.create_vs_state = nv40_vp_state_create;
nv40->pipe.bind_vs_state = nv40_vp_state_bind;
nv40->pipe.delete_vs_state = nv40_vp_state_delete;

nv40->pipe.create_fs_state = nv40_fp_state_create;
nv40->pipe.bind_fs_state = nv40_fp_state_bind;
nv40->pipe.delete_fs_state = nv40_fp_state_delete;

nv40->pipe.set_blend_color = nv40_set_blend_color;
nv40->pipe.set_stencil_ref = nv40_set_stencil_ref;
nv40->pipe.set_clip_state = nv40_set_clip_state;
nv40->pipe.set_constant_buffer = nv40_set_constant_buffer;
nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state;
nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple;
nv40->pipe.set_scissor_state = nv40_set_scissor_state;
nv40->pipe.set_viewport_state = nv40_set_viewport_state;

nv40->pipe.create_vertex_elements_state = nv40_vtxelts_state_create;
nv40->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete;
nv40->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind;

nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
}


+ 0
- 41
src/gallium/drivers/nv40/nv40_state_blend.c 파일 보기

@@ -1,41 +0,0 @@
#include "nv40_context.h"

static boolean
nv40_state_blend_validate(struct nv40_context *nv40)
{
so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]);
return TRUE;
}

struct nv40_state_entry nv40_state_blend = {
.validate = nv40_state_blend_validate,
.dirty = {
.pipe = NV40_NEW_BLEND,
.hw = NV40_STATE_BLEND
}
};

static boolean
nv40_state_blend_colour_validate(struct nv40_context *nv40)
{
struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nv40->blend_colour;

so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
(float_to_ubyte(bcol->color[0]) << 16) |
(float_to_ubyte(bcol->color[1]) << 8) |
(float_to_ubyte(bcol->color[2]) << 0)));

so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]);
so_ref(NULL, &so);
return TRUE;
}

struct nv40_state_entry nv40_state_blend_colour = {
.validate = nv40_state_blend_colour_validate,
.dirty = {
.pipe = NV40_NEW_BCOL,
.hw = NV40_STATE_BCOL
}
};

+ 0
- 189
src/gallium/drivers/nv40/nv40_state_emit.c 파일 보기

@@ -1,189 +0,0 @@
#include "nv40_context.h"
#include "nv40_state.h"
#include "draw/draw_context.h"

static struct nv40_state_entry *render_states[] = {
&nv40_state_framebuffer,
&nv40_state_rasterizer,
&nv40_state_scissor,
&nv40_state_stipple,
&nv40_state_fragprog,
&nv40_state_fragtex,
&nv40_state_vertprog,
&nv40_state_blend,
&nv40_state_blend_colour,
&nv40_state_zsa,
&nv40_state_sr,
&nv40_state_viewport,
&nv40_state_vbo,
NULL
};

static struct nv40_state_entry *swtnl_states[] = {
&nv40_state_framebuffer,
&nv40_state_rasterizer,
&nv40_state_scissor,
&nv40_state_stipple,
&nv40_state_fragprog,
&nv40_state_fragtex,
&nv40_state_vertprog,
&nv40_state_blend,
&nv40_state_blend_colour,
&nv40_state_zsa,
&nv40_state_sr,
&nv40_state_viewport,
&nv40_state_vtxfmt,
NULL
};

static void
nv40_state_do_validate(struct nv40_context *nv40,
struct nv40_state_entry **states)
{
while (*states) {
struct nv40_state_entry *e = *states;

if (nv40->dirty & e->dirty.pipe) {
if (e->validate(nv40))
nv40->state.dirty |= (1ULL << e->dirty.hw);
}

states++;
}
nv40->dirty = 0;
}

void
nv40_state_emit(struct nv40_context *nv40)
{
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;
unsigned i;
uint64_t states;

/* XXX: race conditions
*/
if (nv40 != screen->cur_ctx) {
for (i = 0; i < NV40_STATE_MAX; i++) {
if (state->hw[i] && screen->state[i] != state->hw[i])
state->dirty |= (1ULL << i);
}

screen->cur_ctx = nv40;
}

for (i = 0, states = state->dirty; states; i++) {
if (!(states & (1ULL << i)))
continue;
so_ref (state->hw[i], &nv40->screen->state[i]);
if (state->hw[i])
so_emit(chan, nv40->screen->state[i]);
states &= ~(1ULL << i);
}

if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
(1ULL << NV40_STATE_FRAGTEX0))) {
BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (chan, 2);
BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (chan, 1);
}

state->dirty = 0;
}

void
nv40_state_flush_notify(struct nouveau_channel *chan)
{
struct nv40_context *nv40 = chan->user_private;
struct nv40_state *state = &nv40->state;
unsigned i, samplers;

so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
if (!(samplers & (1 << i)))
continue;
so_emit_reloc_markers(chan,
state->hw[NV40_STATE_FRAGTEX0+i]);
samplers &= ~(1ULL << i);
}
so_emit_reloc_markers(chan, state->hw[NV40_STATE_FRAGPROG]);
if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW)
so_emit_reloc_markers(chan, state->hw[NV40_STATE_VTXBUF]);
}

boolean
nv40_state_validate(struct nv40_context *nv40)
{
boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE;

if (nv40->render_mode != HW) {
/* Don't even bother trying to go back to hw if none
* of the states that caused swtnl previously have changed.
*/
if ((nv40->fallback_swtnl & nv40->dirty)
!= nv40->fallback_swtnl)
return FALSE;

/* Attempt to go to hwtnl again */
nv40->pipe.flush(&nv40->pipe, 0, NULL);
nv40->dirty |= (NV40_NEW_VIEWPORT |
NV40_NEW_VERTPROG |
NV40_NEW_ARRAYS);
nv40->render_mode = HW;
}

nv40_state_do_validate(nv40, render_states);
if (nv40->fallback_swtnl || nv40->fallback_swrast)
return FALSE;
if (was_sw)
NOUVEAU_ERR("swtnl->hw\n");

return TRUE;
}

boolean
nv40_state_validate_swtnl(struct nv40_context *nv40)
{
struct draw_context *draw = nv40->draw;

/* Setup for swtnl */
if (nv40->render_mode == HW) {
NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
nv40->pipe.flush(&nv40->pipe, 0, NULL);
nv40->dirty |= (NV40_NEW_VIEWPORT |
NV40_NEW_VERTPROG |
NV40_NEW_ARRAYS);
nv40->render_mode = SWTNL;
}

if (nv40->draw_dirty & NV40_NEW_VERTPROG)
draw_bind_vertex_shader(draw, nv40->vertprog->draw);

if (nv40->draw_dirty & NV40_NEW_RAST)
draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe);

if (nv40->draw_dirty & NV40_NEW_UCP)
draw_set_clip_state(draw, &nv40->clip);

if (nv40->draw_dirty & NV40_NEW_VIEWPORT)
draw_set_viewport_state(draw, &nv40->viewport);

if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
draw_set_vertex_elements(draw, nv40->vtxelt->num_elements, nv40->vtxelt->pipe);
}

nv40_state_do_validate(nv40, swtnl_states);
if (nv40->fallback_swrast) {
NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
return FALSE;
}

nv40->draw_dirty = 0;
return TRUE;
}


+ 0
- 175
src/gallium/drivers/nv40/nv40_state_fb.c 파일 보기

@@ -1,175 +0,0 @@
#include "nv40_context.h"
#include "nouveau/nouveau_util.h"

static struct pipe_buffer *
nv40_do_surface_buffer(struct pipe_surface *surface)
{
struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
return mt->buffer;
}

#define nv40_surface_buffer(ps) nouveau_bo(nv40_do_surface_buffer(ps))

static boolean
nv40_state_framebuffer_validate(struct nv40_context *nv40)
{
struct nouveau_channel *chan = nv40->screen->base.channel;
struct nouveau_grobj *curie = nv40->screen->curie;
struct pipe_framebuffer_state *fb = &nv40->framebuffer;
struct nv04_surface *rt[4], *zeta;
uint32_t rt_enable, rt_format;
int i, colour_format = 0, zeta_format = 0;
struct nouveau_stateobj *so = so_new(18, 24, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;

rt_enable = 0;
for (i = 0; i < fb->nr_cbufs; i++) {
if (colour_format) {
assert(colour_format == fb->cbufs[i]->format);
} else {
colour_format = fb->cbufs[i]->format;
rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i);
rt[i] = (struct nv04_surface *)fb->cbufs[i];
}
}

if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 |
NV40TCL_RT_ENABLE_COLOR3))
rt_enable |= NV40TCL_RT_ENABLE_MRT;

if (fb->zsbuf) {
zeta_format = fb->zsbuf->format;
zeta = (struct nv04_surface *)fb->zsbuf;
}

if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
for (i = 1; i < fb->nr_cbufs; i++)
assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));

rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED |
log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT |
log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT;
}
else
rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR;

switch (colour_format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
case 0:
rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
break;
case PIPE_FORMAT_B5G6R5_UNORM:
rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5;
break;
default:
assert(0);
}

switch (zeta_format) {
case PIPE_FORMAT_Z16_UNORM:
rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16;
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
case 0:
rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8;
break;
default:
assert(0);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
so_method(so, curie, NV40TCL_DMA_COLOR0, 1);
so_reloc (so, nv40_surface_buffer(&rt[0]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, curie, NV40TCL_COLOR0_PITCH, 2);
so_data (so, rt[0]->pitch);
so_reloc (so, nv40_surface_buffer(&rt[0]->base),
rt[0]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
so_method(so, curie, NV40TCL_DMA_COLOR1, 1);
so_reloc (so, nv40_surface_buffer(&rt[1]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, curie, NV40TCL_COLOR1_OFFSET, 2);
so_reloc (so, nv40_surface_buffer(&rt[1]->base),
rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
so_data (so, rt[1]->pitch);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
so_method(so, curie, NV40TCL_DMA_COLOR2, 1);
so_reloc (so, nv40_surface_buffer(&rt[2]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, curie, NV40TCL_COLOR2_OFFSET, 1);
so_reloc (so, nv40_surface_buffer(&rt[2]->base),
rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
so_method(so, curie, NV40TCL_COLOR2_PITCH, 1);
so_data (so, rt[2]->pitch);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
so_method(so, curie, NV40TCL_DMA_COLOR3, 1);
so_reloc (so, nv40_surface_buffer(&rt[3]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, curie, NV40TCL_COLOR3_OFFSET, 1);
so_reloc (so, nv40_surface_buffer(&rt[3]->base),
rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
so_method(so, curie, NV40TCL_COLOR3_PITCH, 1);
so_data (so, rt[3]->pitch);
}

if (zeta_format) {
so_method(so, curie, NV40TCL_DMA_ZETA, 1);
so_reloc (so, nv40_surface_buffer(&zeta->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, curie, NV40TCL_ZETA_OFFSET, 1);
so_reloc (so, nv40_surface_buffer(&zeta->base),
zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
so_method(so, curie, NV40TCL_ZETA_PITCH, 1);
so_data (so, zeta->pitch);
}

so_method(so, curie, NV40TCL_RT_ENABLE, 1);
so_data (so, rt_enable);
so_method(so, curie, NV40TCL_RT_HORIZ, 3);
so_data (so, (w << 16) | 0);
so_data (so, (h << 16) | 0);
so_data (so, rt_format);
so_method(so, curie, NV40TCL_VIEWPORT_HORIZ, 2);
so_data (so, (w << 16) | 0);
so_data (so, (h << 16) | 0);
so_method(so, curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
so_data (so, ((w - 1) << 16) | 0);
so_data (so, ((h - 1) << 16) | 0);
so_method(so, curie, 0x1d88, 1);
so_data (so, (1 << 12) | h);

so_ref(so, &nv40->state.hw[NV40_STATE_FB]);
so_ref(NULL, &so);
return TRUE;
}

struct nv40_state_entry nv40_state_framebuffer = {
.validate = nv40_state_framebuffer_validate,
.dirty = {
.pipe = NV40_NEW_FB,
.hw = NV40_STATE_FB
}
};

+ 0
- 17
src/gallium/drivers/nv40/nv40_state_rasterizer.c 파일 보기

@@ -1,17 +0,0 @@
#include "nv40_context.h"

static boolean
nv40_state_rasterizer_validate(struct nv40_context *nv40)
{
so_ref(nv40->rasterizer->so,
&nv40->state.hw[NV40_STATE_RAST]);
return TRUE;
}

struct nv40_state_entry nv40_state_rasterizer = {
.validate = nv40_state_rasterizer_validate,
.dirty = {
.pipe = NV40_NEW_RAST,
.hw = NV40_STATE_RAST
}
};

+ 0
- 36
src/gallium/drivers/nv40/nv40_state_scissor.c 파일 보기

@@ -1,36 +0,0 @@
#include "nv40_context.h"

static boolean
nv40_state_scissor_validate(struct nv40_context *nv40)
{
struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
struct pipe_scissor_state *s = &nv40->scissor;
struct nouveau_stateobj *so;

if (nv40->state.hw[NV40_STATE_SCISSOR] &&
(rast->scissor == 0 && nv40->state.scissor_enabled == 0))
return FALSE;
nv40->state.scissor_enabled = rast->scissor;

so = so_new(1, 2, 0);
so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
if (nv40->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
} else {
so_data (so, 4096 << 16);
so_data (so, 4096 << 16);
}

so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]);
so_ref(NULL, &so);
return TRUE;
}

struct nv40_state_entry nv40_state_scissor = {
.validate = nv40_state_scissor_validate,
.dirty = {
.pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST,
.hw = NV40_STATE_SCISSOR
}
};

+ 0
- 39
src/gallium/drivers/nv40/nv40_state_stipple.c 파일 보기

@@ -1,39 +0,0 @@
#include "nv40_context.h"

static boolean
nv40_state_stipple_validate(struct nv40_context *nv40)
{
struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
struct nouveau_grobj *curie = nv40->screen->curie;
struct nouveau_stateobj *so;

if (nv40->state.hw[NV40_STATE_STIPPLE] &&
(rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0))
return FALSE;

if (rast->poly_stipple_enable) {
unsigned i;

so = so_new(2, 33, 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nv40->stipple[i]);
} else {
so = so_new(1, 1, 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}

so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]);
return TRUE;
}

struct nv40_state_entry nv40_state_stipple = {
.validate = nv40_state_stipple_validate,
.dirty = {
.pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST,
.hw = NV40_STATE_STIPPLE,
}
};

+ 0
- 38
src/gallium/drivers/nv40/nv40_state_viewport.c 파일 보기

@@ -1,38 +0,0 @@
#include "nv40_context.h"

static boolean
nv40_state_viewport_validate(struct nv40_context *nv40)
{
struct pipe_viewport_state *vpt = &nv40->viewport;
struct nouveau_stateobj *so;

if (nv40->state.hw[NV40_STATE_VIEWPORT] &&
!(nv40->dirty & NV40_NEW_VIEWPORT))
return FALSE;

so = so_new(2, 9, 0);
so_method(so, nv40->screen->curie,
NV40TCL_VIEWPORT_TRANSLATE_X, 8);
so_data (so, fui(vpt->translate[0]));
so_data (so, fui(vpt->translate[1]));
so_data (so, fui(vpt->translate[2]));
so_data (so, fui(vpt->translate[3]));
so_data (so, fui(vpt->scale[0]));
so_data (so, fui(vpt->scale[1]));
so_data (so, fui(vpt->scale[2]));
so_data (so, fui(vpt->scale[3]));
so_method(so, nv40->screen->curie, 0x1d78, 1);
so_data (so, 1);

so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]);
so_ref(NULL, &so);
return TRUE;
}

struct nv40_state_entry nv40_state_viewport = {
.validate = nv40_state_viewport_validate,
.dirty = {
.pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST,
.hw = NV40_STATE_VIEWPORT
}
};

+ 0
- 41
src/gallium/drivers/nv40/nv40_state_zsa.c 파일 보기

@@ -1,41 +0,0 @@
#include "nv40_context.h"

static boolean
nv40_state_zsa_validate(struct nv40_context *nv40)
{
so_ref(nv40->zsa->so,
&nv40->state.hw[NV40_STATE_ZSA]);
return TRUE;
}

struct nv40_state_entry nv40_state_zsa = {
.validate = nv40_state_zsa_validate,
.dirty = {
.pipe = NV40_NEW_ZSA,
.hw = NV40_STATE_ZSA
}
};

static boolean
nv40_state_sr_validate(struct nv40_context *nv40)
{
struct nouveau_stateobj *so = so_new(2, 2, 0);
struct pipe_stencil_ref *sr = &nv40->stencil_ref;

so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_FUNC_REF, 1);
so_data (so, sr->ref_value[0]);
so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_FUNC_REF, 1);
so_data (so, sr->ref_value[1]);

so_ref(so, &nv40->state.hw[NV40_STATE_SR]);
so_ref(NULL, &so);
return TRUE;
}

struct nv40_state_entry nv40_state_sr = {
.validate = nv40_state_sr_validate,
.dirty = {
.pipe = NV40_NEW_SR,
.hw = NV40_STATE_SR
}
};

+ 0
- 64
src/gallium/drivers/nv40/nv40_surface.c 파일 보기

@@ -1,64 +0,0 @@

/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/

#include "pipe/p_defines.h"
#include "util/u_inlines.h"

#include "util/u_tile.h"

#include "nv40_context.h"

static void
nv40_surface_copy(struct pipe_context *pipe,
struct pipe_surface *dest, unsigned destx, unsigned desty,
struct pipe_surface *src, unsigned srcx, unsigned srcy,
unsigned width, unsigned height)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv04_surface_2d *eng2d = nv40->screen->eng2d;

eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
}

static void
nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
unsigned destx, unsigned desty, unsigned width,
unsigned height, unsigned value)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv04_surface_2d *eng2d = nv40->screen->eng2d;

eng2d->fill(eng2d, dest, destx, desty, width, height, value);
}

void
nv40_init_surface_functions(struct nv40_context *nv40)
{
nv40->pipe.surface_copy = nv40_surface_copy;
nv40->pipe.surface_fill = nv40_surface_fill;
}

+ 0
- 181
src/gallium/drivers/nv40/nv40_transfer.c 파일 보기

@@ -1,181 +0,0 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "nouveau/nouveau_winsys.h"
#include "nv40_context.h"
#include "nv40_screen.h"
#include "nv40_state.h"

struct nv40_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
boolean direct;
};

static void
nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
template->width0 = width;
template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;

template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC |
NOUVEAU_TEXTURE_USAGE_LINEAR;
}

static struct pipe_transfer *
nv40_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
enum pipe_transfer_usage usage,
unsigned x, unsigned y, unsigned w, unsigned h)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nv40_miptree *mt = (struct nv40_miptree *)pt;
struct nv40_transfer *tx;
struct pipe_texture tx_tex_template, *tx_tex;

tx = CALLOC_STRUCT(nv40_transfer);
if (!tx)
return NULL;

pipe_texture_reference(&tx->base.texture, pt);
tx->base.x = x;
tx->base.y = y;
tx->base.width = w;
tx->base.height = h;
tx->base.stride = mt->level[level].pitch;
tx->base.usage = usage;
tx->base.face = face;
tx->base.level = level;
tx->base.zslice = zslice;

/* Direct access to texture */
if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC ||
debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) &&
pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)
{
tx->direct = true;
tx->surface = pscreen->get_tex_surface(pscreen, pt,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
return &tx->base;
}

tx->direct = false;

nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template);

tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
{
FREE(tx);
return NULL;
}

tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch;

tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));

pipe_texture_reference(&tx_tex, NULL);

if (!tx->surface)
{
pipe_surface_reference(&tx->surface, NULL);
FREE(tx);
return NULL;
}

if (usage & PIPE_TRANSFER_READ) {
struct nv40_screen *nvscreen = nv40_screen(pscreen);
struct pipe_surface *src;

src = pscreen->get_tex_surface(pscreen, pt,
face, level, zslice,
PIPE_BUFFER_USAGE_GPU_READ);

/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
src, x, y,
w, h);

pipe_surface_reference(&src, NULL);
}

return &tx->base;
}

static void
nv40_transfer_del(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct nv40_transfer *tx = (struct nv40_transfer *)ptx;

if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
struct pipe_screen *pscreen = pcontext->screen;
struct nv40_screen *nvscreen = nv40_screen(pscreen);
struct pipe_surface *dst;

dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);

/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
tx->base.width, tx->base.height);

pipe_surface_reference(&dst, NULL);
}

pipe_surface_reference(&tx->surface, NULL);
pipe_texture_reference(&ptx->texture, NULL);
FREE(ptx);
}

static void *
nv40_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nv40_transfer *tx = (struct nv40_transfer *)ptx;
struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture;
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));

if(!tx->direct)
return map + ns->base.offset;
else
return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}

static void
nv40_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nv40_transfer *tx = (struct nv40_transfer *)ptx;
struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture;

pipe_buffer_unmap(pscreen, mt->buffer);
}

void
nv40_init_transfer_functions(struct nv40_context *nv40)
{
nv40->pipe.get_tex_transfer = nv40_transfer_new;
nv40->pipe.tex_transfer_destroy = nv40_transfer_del;
nv40->pipe.transfer_map = nv40_transfer_map;
nv40->pipe.transfer_unmap = nv40_transfer_unmap;
}

+ 0
- 565
src/gallium/drivers/nv40/nv40_vbo.c 파일 보기

@@ -1,565 +0,0 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"

#include "nv40_context.h"
#include "nv40_state.h"

#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
#include "nouveau/nouveau_util.h"

#define FORCE_SWTNL 0

static INLINE int
nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
{
switch (pipe) {
case PIPE_FORMAT_R32_FLOAT:
case PIPE_FORMAT_R32G32_FLOAT:
case PIPE_FORMAT_R32G32B32_FLOAT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
*fmt = NV40TCL_VTXFMT_TYPE_FLOAT;
break;
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R8G8B8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
*fmt = NV40TCL_VTXFMT_TYPE_UBYTE;
break;
case PIPE_FORMAT_R16_SSCALED:
case PIPE_FORMAT_R16G16_SSCALED:
case PIPE_FORMAT_R16G16B16_SSCALED:
case PIPE_FORMAT_R16G16B16A16_SSCALED:
*fmt = NV40TCL_VTXFMT_TYPE_USHORT;
break;
default:
NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
return 1;
}

switch (pipe) {
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R32_FLOAT:
case PIPE_FORMAT_R16_SSCALED:
*ncomp = 1;
break;
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R32G32_FLOAT:
case PIPE_FORMAT_R16G16_SSCALED:
*ncomp = 2;
break;
case PIPE_FORMAT_R8G8B8_UNORM:
case PIPE_FORMAT_R32G32B32_FLOAT:
case PIPE_FORMAT_R16G16B16_SSCALED:
*ncomp = 3;
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R16G16B16A16_SSCALED:
*ncomp = 4;
break;
default:
NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
return 1;
}

return 0;
}

static boolean
nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib,
unsigned ib_size)
{
struct pipe_screen *pscreen = &nv40->screen->base.base;
unsigned type;

if (!ib) {
nv40->idxbuf = NULL;
nv40->idxbuf_format = 0xdeadbeef;
return FALSE;
}

if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
return FALSE;

switch (ib_size) {
case 2:
type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
break;
case 4:
type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
break;
default:
return FALSE;
}

if (ib != nv40->idxbuf ||
type != nv40->idxbuf_format) {
nv40->dirty |= NV40_NEW_ARRAYS;
nv40->idxbuf = ib;
nv40->idxbuf_format = type;
}

return TRUE;
}

static boolean
nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
int attrib, struct pipe_vertex_element *ve,
struct pipe_vertex_buffer *vb)
{
struct pipe_screen *pscreen = nv40->pipe.screen;
struct nouveau_grobj *curie = nv40->screen->curie;
unsigned type, ncomp;
void *map;

if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp))
return FALSE;

map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
map += vb->buffer_offset + ve->src_offset;

switch (type) {
case NV40TCL_VTXFMT_TYPE_FLOAT:
{
float *v = map;

switch (ncomp) {
case 4:
so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4);
so_data (so, fui(v[0]));
so_data (so, fui(v[1]));
so_data (so, fui(v[2]));
so_data (so, fui(v[3]));
break;
case 3:
so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3);
so_data (so, fui(v[0]));
so_data (so, fui(v[1]));
so_data (so, fui(v[2]));
break;
case 2:
so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2);
so_data (so, fui(v[0]));
so_data (so, fui(v[1]));
break;
case 1:
so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1);
so_data (so, fui(v[0]));
break;
default:
pipe_buffer_unmap(pscreen, vb->buffer);
return FALSE;
}
}
break;
default:
pipe_buffer_unmap(pscreen, vb->buffer);
return FALSE;
}

pipe_buffer_unmap(pscreen, vb->buffer);

return TRUE;
}

void
nv40_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;
unsigned restart;

nv40_vbo_set_idxbuf(nv40, NULL, 0);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
nv40_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);
return;
}

while (count) {
unsigned vc, nr;

nv40_state_emit(nv40);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256,
mode, start, count, &restart);
if (!vc) {
FIRE_RING(chan);
continue;
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

nr = (vc & 0xff);
if (nr) {
BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1);
OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}

nr = vc >> 8;
while (nr) {
unsigned push = nr > 2047 ? 2047 : nr;

nr -= push;

BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push);
while (push--) {
OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, 0);

count -= vc;
start = restart;
}

pipe->flush(pipe, 0, NULL);
}

static INLINE void
nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

while (count) {
uint8_t *elts = (uint8_t *)ib + start;
unsigned vc, push, restart;

nv40_state_emit(nv40);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2,
mode, start, count, &restart);
if (vc == 0) {
FIRE_RING(chan);
continue;
}
count -= vc;

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

if (vc & 1) {
BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}

while (vc) {
unsigned i;

push = MIN2(vc, 2047 * 2);

BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, (elts[i+1] << 16) | elts[i]);

vc -= push;
elts += push;
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, 0);

start = restart;
}
}

static INLINE void
nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

while (count) {
uint16_t *elts = (uint16_t *)ib + start;
unsigned vc, push, restart;

nv40_state_emit(nv40);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2,
mode, start, count, &restart);
if (vc == 0) {
FIRE_RING(chan);
continue;
}
count -= vc;

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

if (vc & 1) {
BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}

while (vc) {
unsigned i;

push = MIN2(vc, 2047 * 2);

BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, (elts[i+1] << 16) | elts[i]);

vc -= push;
elts += push;
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, 0);

start = restart;
}
}

static INLINE void
nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;

while (count) {
uint32_t *elts = (uint32_t *)ib + start;
unsigned vc, push, restart;

nv40_state_emit(nv40);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1,
mode, start, count, &restart);
if (vc == 0) {
FIRE_RING(chan);
continue;
}
count -= vc;

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

while (vc) {
push = MIN2(vc, 2047);

BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push);
OUT_RINGp (chan, elts, push);

vc -= push;
elts += push;
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, 0);

start = restart;
}
}

static void
nv40_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
void *map;

map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ);
if (!ib) {
NOUVEAU_ERR("failed mapping ib\n");
return;
}

switch (ib_size) {
case 1:
nv40_draw_elements_u08(nv40, map, mode, start, count);
break;
case 2:
nv40_draw_elements_u16(nv40, map, mode, start, count);
break;
case 4:
nv40_draw_elements_u32(nv40, map, mode, start, count);
break;
default:
NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
break;
}

pipe_buffer_unmap(pscreen, ib);
}

static void
nv40_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_screen *screen = nv40->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *curie = screen->curie;
unsigned restart;

while (count) {
unsigned nr, vc;

nv40_state_emit(nv40);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256,
mode, start, count, &restart);
if (!vc) {
FIRE_RING(chan);
continue;
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

nr = (vc & 0xff);
if (nr) {
BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1);
OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}

nr = vc >> 8;
while (nr) {
unsigned push = nr > 2047 ? 2047 : nr;

nr -= push;

BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push);
while (push--) {
OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}

BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
OUT_RING (chan, 0);

count -= vc;
start = restart;
}
}

void
nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
boolean idxbuf;

idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
nv40_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);
return;
}

if (idxbuf) {
nv40_draw_elements_vbo(pipe, mode, start, count);
} else {
nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
mode, start, count);
}

pipe->flush(pipe, 0, NULL);
}

static boolean
nv40_vbo_validate(struct nv40_context *nv40)
{
struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
struct nouveau_grobj *curie = nv40->screen->curie;
struct pipe_buffer *ib = nv40->idxbuf;
unsigned ib_format = nv40->idxbuf_format;
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;

vtxbuf = so_new(3, 17, 18);
so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt->num_elements);
vtxfmt = so_new(1, 16, 0);
so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt->num_elements);

for (hw = 0; hw < nv40->vtxelt->num_elements; hw++) {
struct pipe_vertex_element *ve;
struct pipe_vertex_buffer *vb;
unsigned type, ncomp;

ve = &nv40->vtxelt->pipe[hw];
vb = &nv40->vtxbuf[ve->vertex_buffer_index];

if (!vb->stride) {
if (!sattr)
sattr = so_new(16, 16 * 4, 0);

if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
so_data(vtxbuf, 0);
so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
continue;
}
}

if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
so_ref(NULL, &vtxbuf);
so_ref(NULL, &vtxfmt);
return FALSE;
}

so_reloc(vtxbuf, nouveau_bo(vb->buffer),
vb->buffer_offset + ve->src_offset,
vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
0, NV40TCL_VTXBUF_ADDRESS_DMA1);
so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) |
(ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type));
}

if (ib) {
struct nouveau_bo *bo = nouveau_bo(ib);

so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2);
so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR,
0, NV40TCL_IDXBUF_FORMAT_DMA1);
}

so_method(vtxbuf, curie, 0x1710, 1);
so_data (vtxbuf, 0);

so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]);
so_ref(NULL, &vtxbuf);
nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF);
so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]);
so_ref(NULL, &vtxfmt);
nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT);
so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]);
so_ref(NULL, &sattr);
nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR);
return FALSE;
}

struct nv40_state_entry nv40_state_vbo = {
.validate = nv40_vbo_validate,
.dirty = {
.pipe = NV40_NEW_ARRAYS,
.hw = 0,
}
};


+ 1
- 0
src/gallium/drivers/nv50/nv50_vbo.c 파일 보기

@@ -549,6 +549,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
if (nv50->vtxbuf_nr == 0)
return NULL;

nv50->vbo_fifo = 0;
if (nv50->screen->force_push ||
nv50->vertprog->cfg.edgeflag_in < 16)
nv50->vbo_fifo = 0xffff;

+ 32
- 0
src/gallium/drivers/nvfx/Makefile 파일 보기

@@ -0,0 +1,32 @@
TOP = ../../../..
include $(TOP)/configs/current

LIBNAME = nvfx

C_SOURCES = \
nv04_surface_2d.c \
nvfx_context.c \
nvfx_clear.c \
nvfx_draw.c \
nvfx_fragprog.c \
nvfx_fragtex.c \
nv30_fragtex.c \
nv40_fragtex.c \
nvfx_miptree.c \
nvfx_query.c \
nvfx_screen.c \
nvfx_state.c \
nvfx_state_blend.c \
nvfx_state_emit.c \
nvfx_state_fb.c \
nvfx_state_rasterizer.c \
nvfx_state_scissor.c \
nvfx_state_stipple.c \
nvfx_state_viewport.c \
nvfx_state_zsa.c \
nvfx_surface.c \
nvfx_transfer.c \
nvfx_vbo.c \
nvfx_vertprog.c

include ../../Makefile.template

src/gallium/drivers/nouveau/nv04_surface_2d.c → src/gallium/drivers/nvfx/nv04_surface_2d.c 파일 보기

@@ -543,4 +543,3 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d

return temp_ns;
}


src/gallium/drivers/nouveau/nv04_surface_2d.h → src/gallium/drivers/nvfx/nv04_surface_2d.h 파일 보기


src/gallium/drivers/nv30/nv30_fragtex.c → src/gallium/drivers/nvfx/nv30_fragtex.c 파일 보기

@@ -1,7 +1,37 @@
#include "util/u_format.h"

#include "nv30_context.h"
#include "nvfx_context.h"
#include "nouveau/nouveau_util.h"
#include "nvfx_tex.h"

void
nv30_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso)
{
if (cso->max_anisotropy >= 8) {
ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
} else
if (cso->max_anisotropy >= 4) {
ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
} else
if (cso->max_anisotropy >= 2) {
ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
}

{
float limit;

limit = CLAMP(cso->lod_bias, -16.0, 15.0);
ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;

limit = CLAMP(cso->max_lod, 0.0, 15.0);
ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;

limit = CLAMP(cso->min_lod, 0.0, 15.0);
ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
}
}

#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
{ \
@@ -57,11 +87,11 @@ nv30_fragtex_format(uint pipe_format)
}


static struct nouveau_stateobj *
nv30_fragtex_build(struct nv30_context *nv30, int unit)
struct nouveau_stateobj *
nv30_fragtex_build(struct nvfx_context *nvfx, int unit)
{
struct nv30_sampler_state *ps = nv30->tex_sampler[unit];
struct nv30_miptree *nv30mt = nv30->tex_miptree[unit];
struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit];
struct pipe_texture *pt = &nv30mt->base;
struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer);
struct nv30_texture_format *tf;
@@ -101,7 +131,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
txs = tf->swizzle;

so = so_new(1, 8, 2);
so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8);
so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
@@ -115,47 +145,3 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)

return so;
}

static boolean
nv30_fragtex_validate(struct nv30_context *nv30)
{
struct nv30_fragment_program *fp = nv30->fragprog;
struct nv30_state *state = &nv30->state;
struct nouveau_stateobj *so;
unsigned samplers, unit;

samplers = state->fp_samplers & ~fp->samplers;
while (samplers) {
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);

so = so_new(1, 1, 0);
so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
so_ref(NULL, &so);
state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
}

samplers = nv30->dirty_samplers & fp->samplers;
while (samplers) {
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);

so = nv30_fragtex_build(nv30, unit);
so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
so_ref(NULL, &so);
state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
}

nv30->state.fp_samplers = fp->samplers;
return FALSE;
}

struct nv30_state_entry nv30_state_fragtex = {
.validate = nv30_fragtex_validate,
.dirty = {
.pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG,
.hw = 0
}
};

+ 169
- 0
src/gallium/drivers/nvfx/nv30_vertprog.h 파일 보기

@@ -0,0 +1,169 @@
#ifndef __NV30_SHADER_H__
#define __NV30_SHADER_H__

/* Vertex programs instruction set
*
* 128bit opcodes, split into 4 32-bit ones for ease of use.
*
* Non-native instructions
* ABS - MOV + NV40_VP_INST0_DEST_ABS
* POW - EX2 + MUL + LG2
* SUB - ADD, second source negated
* SWZ - MOV
* XPD -
*
* Register access
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
* - Only one CONST can be accessed per-instruction (move extras into TEMPs)
*
* Relative Addressing
* According to the value returned for
* MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
*
* there are only two address registers available. The destination in the
* ARL instruction is set to TEMP <n> (The temp isn't actually written).
*
* When using vanilla ARB_v_p, the proprietary driver will squish both the
* available ADDRESS regs into the first hardware reg in the X and Y
* components.
*
* To use an address reg as an index into consts, the CONST_SRC is set to
* (const_base + offset) and INDEX_CONST is set.
*
* To access the second address reg use ADDR_REG_SELECT_1. A particular
* component of the address regs is selected with ADDR_SWZ.
*
* Only one address register can be accessed per instruction.
*
* Conditional execution (see NV_vertex_program{2,3} for details) Conditional
* execution of an instruction is enabled by setting COND_TEST_ENABLE, and
* selecting the condition which will allow the test to pass with
* COND_{FL,LT,...}. It is possible to swizzle the values in the condition
* register, which allows for testing against an individual component.
*
* Branching:
*
* The BRA/CAL instructions seem to follow a slightly different opcode
* layout. The destination instruction ID (IADDR) overlaps a source field.
* Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
* command, and is incremented automatically on each UPLOAD_INST FIFO
* command.
*
* Conditional branching is achieved by using the condition tests described
* above. There doesn't appear to be dedicated looping instructions, but
* this can be done using a temp reg + conditional branching.
*
* Subroutines may be uploaded before the main program itself, but the first
* executed instruction is determined by the PROGRAM_START_ID FIFO command.
*
*/

/* DWORD 0 */

#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
#define NV30_VP_INST_VEC_RESULT (1 << 20)
#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
#define NV30_VP_INST_COND_SHIFT 11
#define NV30_VP_INST_COND_MASK (0x07 << 11)
#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)

/* DWORD 1 */
#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
#define NV30_VP_INST_CONST_SRC_SHIFT 14
#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/

/* Please note: the IADDR fields overlap other fields because they are used
* only for branch instructions. See Branching: label above
*
* DWORD 2
*/
#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
#define NV30_VP_INST_IADDR_SHIFT 2
#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */

/* DWORD 3 */
#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
#define NV30_VP_INST_DEST_SHIFT 2
#define NV30_VP_INST_DEST_MASK (0x0F << 2)
# define NV30_VP_INST_DEST_POS 0
# define NV30_VP_INST_DEST_BFC0 1
# define NV30_VP_INST_DEST_BFC1 2
# define NV30_VP_INST_DEST_COL0 3
# define NV30_VP_INST_DEST_COL1 4
# define NV30_VP_INST_DEST_FOGC 5
# define NV30_VP_INST_DEST_PSZ 6
# define NV30_VP_INST_DEST_TC(n) (8+n)

/* Useful to split the source selection regs into their pieces */
#define NV30_VP_SRC0_HIGH_SHIFT 6
#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
#define NV30_VP_SRC0_LOW_MASK 0x0000003F
#define NV30_VP_SRC2_HIGH_SHIFT 4
#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
#define NV30_VP_SRC2_LOW_MASK 0x0000000F


/* Source-register definition - matches NV20 exactly */
#define NV30_VP_SRC_NEGATE (1<<14)
#define NV30_VP_SRC_SWZ_X_SHIFT 12
#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
#define NV30_VP_SRC_SWZ_Y_SHIFT 10
#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
#define NV30_VP_SRC_SWZ_Z_SHIFT 8
#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
#define NV30_VP_SRC_SWZ_W_SHIFT 6
#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
#define NV30_VP_SRC_REG_TYPE_SHIFT 0
#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
#define NV30_VP_SRC_REG_TYPE_TEMP 1
#define NV30_VP_SRC_REG_TYPE_INPUT 2
#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */

#include "nvfx_shader.h"

#endif

src/gallium/drivers/nv40/nv40_fragtex.c → src/gallium/drivers/nvfx/nv40_fragtex.c 파일 보기

@@ -1,18 +1,63 @@
#include "util/u_format.h"
#include "nvfx_context.h"
#include "nvfx_tex.h"

#include "nv40_context.h"
void
nv40_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso)
{
if (cso->max_anisotropy >= 2) {
/* no idea, binary driver sets it, works without it.. meh.. */
ps->wrap |= (1 << 5);

if (cso->max_anisotropy >= 16) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
} else
if (cso->max_anisotropy >= 12) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
} else
if (cso->max_anisotropy >= 10) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
} else
if (cso->max_anisotropy >= 8) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
} else
if (cso->max_anisotropy >= 6) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
} else
if (cso->max_anisotropy >= 4) {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
} else {
ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
}
}

{
float limit;

limit = CLAMP(cso->lod_bias, -16.0, 15.0);
ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;

limit = CLAMP(cso->max_lod, 0.0, 15.0);
ps->en |= (int)(limit * 256.0) << 7;

limit = CLAMP(cso->min_lod, 0.0, 15.0);
ps->en |= (int)(limit * 256.0) << 19;
}
}

#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
{ \
TRUE, \
PIPE_FORMAT_##m, \
NV40TCL_TEX_FORMAT_FORMAT_##tf, \
(NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \
NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \
NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \
NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \
((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \
(NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \
(NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \
((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \
(NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \
}

struct nv40_texture_format {
@@ -60,11 +105,11 @@ nv40_fragtex_format(uint pipe_format)
}


static struct nouveau_stateobj *
nv40_fragtex_build(struct nv40_context *nv40, int unit)
struct nouveau_stateobj *
nv40_fragtex_build(struct nvfx_context *nvfx, int unit)
{
struct nv40_sampler_state *ps = nv40->tex_sampler[unit];
struct nv40_miptree *nv40mt = nv40->tex_miptree[unit];
struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit];
struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer);
struct pipe_texture *pt = &nv40mt->base;
struct nv40_texture_format *tf;
@@ -81,20 +126,20 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);

if (1) /* XXX */
txf |= NV40TCL_TEX_FORMAT_NO_BORDER;
txf |= NV34TCL_TX_FORMAT_NO_BORDER;

switch (pt->target) {
case PIPE_TEXTURE_CUBE:
txf |= NV40TCL_TEX_FORMAT_CUBIC;
txf |= NV34TCL_TX_FORMAT_CUBIC;
/* fall-through */
case PIPE_TEXTURE_2D:
txf |= NV40TCL_TEX_FORMAT_DIMS_2D;
txf |= NV34TCL_TX_FORMAT_DIMS_2D;
break;
case PIPE_TEXTURE_3D:
txf |= NV40TCL_TEX_FORMAT_DIMS_3D;
txf |= NV34TCL_TX_FORMAT_DIMS_3D;
break;
case PIPE_TEXTURE_1D:
txf |= NV40TCL_TEX_FORMAT_DIMS_1D;
txf |= NV34TCL_TX_FORMAT_DIMS_1D;
break;
default:
NOUVEAU_ERR("Unknown target %d\n", pt->target);
@@ -111,63 +156,19 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
txs = tf->swizzle;

so = so_new(2, 9, 2);
so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8);
so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1);
NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
so_data (so, ps->wrap);
so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
so_data (so, txs);
so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) |
so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
pt->height0);
so_data (so, ps->bcol);
so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1);
so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);

return so;
}

static boolean
nv40_fragtex_validate(struct nv40_context *nv40)
{
struct nv40_fragment_program *fp = nv40->fragprog;
struct nv40_state *state = &nv40->state;
struct nouveau_stateobj *so;
unsigned samplers, unit;

samplers = state->fp_samplers & ~fp->samplers;
while (samplers) {
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);

so = so_new(1, 1, 0);
so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
}

samplers = nv40->dirty_samplers & fp->samplers;
while (samplers) {
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);

so = nv40_fragtex_build(nv40, unit);
so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
so_ref(NULL, &so);
state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
}

nv40->state.fp_samplers = fp->samplers;
return FALSE;
}

struct nv40_state_entry nv40_state_fragtex = {
.validate = nv40_fragtex_validate,
.dirty = {
.pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG,
.hw = 0
}
};


+ 177
- 0
src/gallium/drivers/nvfx/nv40_vertprog.h 파일 보기

@@ -0,0 +1,177 @@
#ifndef __NV40_SHADER_H__
#define __NV40_SHADER_H__

/* Vertex programs instruction set
*
* The NV40 instruction set is very similar to NV30. Most fields are in
* a slightly different position in the instruction however.
*
* Merged instructions
* In some cases it is possible to put two instructions into one opcode
* slot. The rules for when this is OK is not entirely clear to me yet.
*
* There are separate writemasks and dest temp register fields for each
* grouping of instructions. There is however only one field with the
* ID of a result register. Writing to temp/result regs is selected by
* setting VEC_RESULT/SCA_RESULT.
*
* Temporary registers
* The source/dest temp register fields have been extended by 1 bit, to
* give a total of 32 temporary registers.
*
* Relative Addressing
* NV40 can use an address register to index into vertex attribute regs.
* This is done by putting the offset value into INPUT_SRC and setting
* the INDEX_INPUT flag.
*
* Conditional execution (see NV_vertex_program{2,3} for details)
* There is a second condition code register on NV40, it's use is enabled
* by setting the COND_REG_SELECT_1 flag.
*
* Texture lookup
* TODO
*/

/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
#define NV40_VP_INST_VEC_RESULT (1 << 30)
/* uncertain.. */
#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
/* use address reg as index into attribs */
#define NV40_VP_INST_INDEX_INPUT (1 << 27)
#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
#define NV40_VP_INST_SRC2_ABS (1 << 23)
#define NV40_VP_INST_SRC1_ABS (1 << 22)
#define NV40_VP_INST_SRC0_ABS (1 << 21)
#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
#define NV40_VP_INST_COND_SHIFT 10
#define NV40_VP_INST_COND_MASK (0x7 << 10)
#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
#define NV40_VP_INST0_KNOWN ( \
NV40_VP_INST_INDEX_INPUT | \
NV40_VP_INST_COND_REG_SELECT_1 | \
NV40_VP_INST_ADDR_REG_SELECT_1 | \
NV40_VP_INST_SRC2_ABS | \
NV40_VP_INST_SRC1_ABS | \
NV40_VP_INST_SRC0_ABS | \
NV40_VP_INST_VEC_DEST_TEMP_MASK | \
NV40_VP_INST_COND_TEST_ENABLE | \
NV40_VP_INST_COND_MASK | \
NV40_VP_INST_COND_SWZ_ALL_MASK | \
NV40_VP_INST_ADDR_SWZ_MASK)

/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
#define NV40_VP_INST_CONST_SRC_SHIFT 12
#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
#define NV40_VP_INST_INPUT_SRC_SHIFT 8
#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
#define NV40_VP_INST_SRC0H_SHIFT 0
#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
#define NV40_VP_INST1_KNOWN ( \
NV40_VP_INST_VEC_OPCODE_MASK | \
NV40_VP_INST_SCA_OPCODE_MASK | \
NV40_VP_INST_CONST_SRC_MASK | \
NV40_VP_INST_INPUT_SRC_MASK | \
NV40_VP_INST_SRC0H_MASK \
)

/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
#define NV40_VP_INST_SRC0L_SHIFT 23
#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
#define NV40_VP_INST_SRC1_SHIFT 6
#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
#define NV40_VP_INST_SRC2H_SHIFT 0
#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
#define NV40_VP_INST_IADDRH_SHIFT 0
#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)

/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
#define NV40_VP_INST_IADDRL_SHIFT 29
#define NV40_VP_INST_IADDRL_MASK (7 << 29)
#define NV40_VP_INST_SRC2L_SHIFT 21
#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
#define NV40_VP_INST_SCA_RESULT (1 << 12)
#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
#define NV40_VP_INST_DEST_SHIFT 2
#define NV40_VP_INST_DEST_MASK (31 << 2)
# define NV40_VP_INST_DEST_POS 0
# define NV40_VP_INST_DEST_COL0 1
# define NV40_VP_INST_DEST_COL1 2
# define NV40_VP_INST_DEST_BFC0 3
# define NV40_VP_INST_DEST_BFC1 4
# define NV40_VP_INST_DEST_FOGC 5
# define NV40_VP_INST_DEST_PSZ 6
# define NV40_VP_INST_DEST_TC0 7
# define NV40_VP_INST_DEST_TC(n) (7+n)
# define NV40_VP_INST_DEST_TEMP 0x1F
#define NV40_VP_INST_INDEX_CONST (1 << 1)
#define NV40_VP_INST3_KNOWN ( \
NV40_VP_INST_SRC2L_MASK |\
NV40_VP_INST_SCA_WRITEMASK_MASK |\
NV40_VP_INST_VEC_WRITEMASK_MASK |\
NV40_VP_INST_SCA_DEST_TEMP_MASK |\
NV40_VP_INST_DEST_MASK |\
NV40_VP_INST_INDEX_CONST)

/* Useful to split the source selection regs into their pieces */
#define NV40_VP_SRC0_HIGH_SHIFT 9
#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
#define NV40_VP_SRC0_LOW_MASK 0x000001FF
#define NV40_VP_SRC2_HIGH_SHIFT 11
#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
#define NV40_VP_SRC2_LOW_MASK 0x000007FF

/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
#define NV40_VP_SRC_NEGATE (1 << 16)
#define NV40_VP_SRC_SWZ_X_SHIFT 14
#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
#define NV40_VP_SRC_SWZ_Y_SHIFT 12
#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
#define NV40_VP_SRC_SWZ_Z_SHIFT 10
#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
#define NV40_VP_SRC_SWZ_W_SHIFT 8
#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
#define NV40_VP_SRC_REG_TYPE_SHIFT 0
#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
# define NV40_VP_SRC_REG_TYPE_UNK0 0
# define NV40_VP_SRC_REG_TYPE_TEMP 1
# define NV40_VP_SRC_REG_TYPE_INPUT 2
# define NV40_VP_SRC_REG_TYPE_CONST 3

#include "nvfx_shader.h"

#endif

src/gallium/drivers/nv40/nv40_clear.c → src/gallium/drivers/nvfx/nvfx_clear.c 파일 보기

@@ -3,12 +3,12 @@
#include "pipe/p_state.h"
#include "util/u_clear.h"

#include "nv40_context.h"
#include "nvfx_context.h"

void
nv40_clear(struct pipe_context *pipe, unsigned buffers,
nvfx_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil)
{
util_clear(pipe, &nv40_context(pipe)->framebuffer, buffers, rgba, depth,
util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth,
stencil);
}

+ 90
- 0
src/gallium/drivers/nvfx/nvfx_context.c 파일 보기

@@ -0,0 +1,90 @@
#include "draw/draw_context.h"
#include "pipe/p_defines.h"

#include "nvfx_context.h"
#include "nvfx_screen.h"

static void
nvfx_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;

if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING (chan, 2);
BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING (chan, 1);
}

FIRE_RING(chan);
if (fence)
*fence = NULL;
}

static void
nvfx_destroy(struct pipe_context *pipe)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
unsigned i;

for (i = 0; i < NVFX_STATE_MAX; i++) {
if (nvfx->state.hw[i])
so_ref(NULL, &nvfx->state.hw[i]);
}

if (nvfx->draw)
draw_destroy(nvfx->draw);
FREE(nvfx);
}

struct pipe_context *
nvfx_create(struct pipe_screen *pscreen, void *priv)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);
struct pipe_winsys *ws = pscreen->winsys;
struct nvfx_context *nvfx;
struct nouveau_winsys *nvws = screen->nvws;

nvfx = CALLOC(1, sizeof(struct nvfx_context));
if (!nvfx)
return NULL;
nvfx->screen = screen;

nvfx->nvws = nvws;

nvfx->pipe.winsys = ws;
nvfx->pipe.screen = pscreen;
nvfx->pipe.priv = priv;
nvfx->pipe.destroy = nvfx_destroy;
nvfx->pipe.draw_arrays = nvfx_draw_arrays;
nvfx->pipe.draw_elements = nvfx_draw_elements;
nvfx->pipe.clear = nvfx_clear;
nvfx->pipe.flush = nvfx_flush;

nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced;
nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;

screen->base.channel->user_private = nvfx;
screen->base.channel->flush_notify = nvfx_state_flush_notify;

nvfx->is_nv4x = screen->is_nv4x;

nvfx_init_query_functions(nvfx);
nvfx_init_surface_functions(nvfx);
nvfx_init_state_functions(nvfx);
nvfx_init_transfer_functions(nvfx);

/* Create, configure, and install fallback swtnl path */
nvfx->draw = draw_create();
draw_wide_point_threshold(nvfx->draw, 9999999.0);
draw_wide_line_threshold(nvfx->draw, 9999999.0);
draw_enable_line_stipple(nvfx->draw, FALSE);
draw_enable_point_sprites(nvfx->draw, FALSE);
draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx));

return &nvfx->pipe;
}

+ 265
- 0
src/gallium/drivers/nvfx/nvfx_context.h 파일 보기

@@ -0,0 +1,265 @@
#ifndef __NVFX_CONTEXT_H__
#define __NVFX_CONTEXT_H__

#include <stdio.h>

#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_compiler.h"

#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_inlines.h"

#include "draw/draw_vertex.h"

#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
#include "nouveau/nouveau_stateobj.h"

#include "nvfx_state.h"

#define NOUVEAU_ERR(fmt, args...) \
fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);

enum nvfx_state_index {
NVFX_STATE_FB = 0,
NVFX_STATE_VIEWPORT = 1,
NVFX_STATE_BLEND = 2,
NVFX_STATE_RAST = 3,
NVFX_STATE_ZSA = 4,
NVFX_STATE_BCOL = 5,
NVFX_STATE_CLIP = 6,
NVFX_STATE_SCISSOR = 7,
NVFX_STATE_STIPPLE = 8,
NVFX_STATE_FRAGPROG = 9,
NVFX_STATE_VERTPROG = 10,
NVFX_STATE_FRAGTEX0 = 11,
NVFX_STATE_FRAGTEX1 = 12,
NVFX_STATE_FRAGTEX2 = 13,
NVFX_STATE_FRAGTEX3 = 14,
NVFX_STATE_FRAGTEX4 = 15,
NVFX_STATE_FRAGTEX5 = 16,
NVFX_STATE_FRAGTEX6 = 17,
NVFX_STATE_FRAGTEX7 = 18,
NVFX_STATE_FRAGTEX8 = 19,
NVFX_STATE_FRAGTEX9 = 20,
NVFX_STATE_FRAGTEX10 = 21,
NVFX_STATE_FRAGTEX11 = 22,
NVFX_STATE_FRAGTEX12 = 23,
NVFX_STATE_FRAGTEX13 = 24,
NVFX_STATE_FRAGTEX14 = 25,
NVFX_STATE_FRAGTEX15 = 26,
NVFX_STATE_VERTTEX0 = 27,
NVFX_STATE_VERTTEX1 = 28,
NVFX_STATE_VERTTEX2 = 29,
NVFX_STATE_VERTTEX3 = 30,
NVFX_STATE_VTXBUF = 31,
NVFX_STATE_VTXFMT = 32,
NVFX_STATE_VTXATTR = 33,
NVFX_STATE_SR = 34,
NVFX_STATE_MAX = 35
};

#include "nvfx_screen.h"

#define NVFX_NEW_BLEND (1 << 0)
#define NVFX_NEW_RAST (1 << 1)
#define NVFX_NEW_ZSA (1 << 2)
#define NVFX_NEW_SAMPLER (1 << 3)
#define NVFX_NEW_FB (1 << 4)
#define NVFX_NEW_STIPPLE (1 << 5)
#define NVFX_NEW_SCISSOR (1 << 6)
#define NVFX_NEW_VIEWPORT (1 << 7)
#define NVFX_NEW_BCOL (1 << 8)
#define NVFX_NEW_VERTPROG (1 << 9)
#define NVFX_NEW_FRAGPROG (1 << 10)
#define NVFX_NEW_ARRAYS (1 << 11)
#define NVFX_NEW_UCP (1 << 12)
#define NVFX_NEW_SR (1 << 13)

struct nvfx_rasterizer_state {
struct pipe_rasterizer_state pipe;
struct nouveau_stateobj *so;
};

struct nvfx_zsa_state {
struct pipe_depth_stencil_alpha_state pipe;
struct nouveau_stateobj *so;
};

struct nvfx_blend_state {
struct pipe_blend_state pipe;
struct nouveau_stateobj *so;
};


struct nvfx_state {
unsigned scissor_enabled;
unsigned stipple_enabled;
unsigned fp_samplers;

uint64_t dirty;
struct nouveau_stateobj *hw[NVFX_STATE_MAX];
};

struct nvfx_vtxelt_state {
struct pipe_vertex_element pipe[16];
unsigned num_elements;
};

struct nvfx_context {
struct pipe_context pipe;

struct nouveau_winsys *nvws;
struct nvfx_screen *screen;

unsigned is_nv4x; /* either 0 or ~0 */

struct draw_context *draw;

/* HW state derived from pipe states */
struct nvfx_state state;
struct {
struct nvfx_vertex_program *vertprog;

unsigned nr_attribs;
unsigned hw[PIPE_MAX_SHADER_INPUTS];
unsigned draw[PIPE_MAX_SHADER_INPUTS];
unsigned emit[PIPE_MAX_SHADER_INPUTS];
} swtnl;

enum {
HW, SWTNL, SWRAST
} render_mode;
unsigned fallback_swtnl;
unsigned fallback_swrast;

/* Context state */
unsigned dirty, draw_dirty;
struct pipe_scissor_state scissor;
unsigned stipple[32];
struct pipe_clip_state clip;
struct nvfx_vertex_program *vertprog;
struct nvfx_fragment_program *fragprog;
struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
unsigned constbuf_nr[PIPE_SHADER_TYPES];
struct nvfx_rasterizer_state *rasterizer;
struct nvfx_zsa_state *zsa;
struct nvfx_blend_state *blend;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state framebuffer;
struct pipe_buffer *idxbuf;
unsigned idxbuf_format;
struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
struct nvfx_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
unsigned nr_textures;
unsigned dirty_samplers;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
struct nvfx_vtxelt_state *vtxelt;
};

static INLINE struct nvfx_context *
nvfx_context(struct pipe_context *pipe)
{
return (struct nvfx_context *)pipe;
}

struct nvfx_state_entry {
boolean (*validate)(struct nvfx_context *nvfx);
struct {
unsigned pipe;
unsigned hw;
} dirty;
};

extern struct nvfx_state_entry nvfx_state_blend;
extern struct nvfx_state_entry nvfx_state_blend_colour;
extern struct nvfx_state_entry nvfx_state_fragprog;
extern struct nvfx_state_entry nvfx_state_fragtex;
extern struct nvfx_state_entry nvfx_state_framebuffer;
extern struct nvfx_state_entry nvfx_state_rasterizer;
extern struct nvfx_state_entry nvfx_state_scissor;
extern struct nvfx_state_entry nvfx_state_sr;
extern struct nvfx_state_entry nvfx_state_stipple;
extern struct nvfx_state_entry nvfx_state_vbo;
extern struct nvfx_state_entry nvfx_state_vertprog;
extern struct nvfx_state_entry nvfx_state_viewport;
extern struct nvfx_state_entry nvfx_state_vtxfmt;
extern struct nvfx_state_entry nvfx_state_zsa;

extern void nvfx_init_query_functions(struct nvfx_context *nvfx);
extern void nvfx_init_surface_functions(struct nvfx_context *nvfx);

/* nvfx_context.c */
struct pipe_context *
nvfx_create(struct pipe_screen *pscreen, void *priv);

/* nvfx_clear.c */
extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil);

/* nvfx_draw.c */
extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx);
extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf,
unsigned ib_size, unsigned mode,
unsigned start, unsigned count);

/* nvfx_fragprog.c */
extern void nvfx_fragprog_destroy(struct nvfx_context *,
struct nvfx_fragment_program *);

/* nv30_fragtex.c */
extern void
nv30_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso);
extern void nv30_fragtex_bind(struct nvfx_context *);
extern struct nouveau_stateobj *
nv30_fragtex_build(struct nvfx_context *nvfx, int unit);

/* nv40_fragtex.c */
extern void
nv40_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso);
extern void nv40_fragtex_bind(struct nvfx_context *);
extern struct nouveau_stateobj *
nv40_fragtex_build(struct nvfx_context *nvfx, int unit);

/* nvfx_state.c */
extern void nvfx_init_state_functions(struct nvfx_context *nvfx);

/* nvfx_state_emit.c */
extern void nvfx_state_flush_notify(struct nouveau_channel *chan);
extern boolean nvfx_state_validate(struct nvfx_context *nvfx);
extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
extern void nvfx_state_emit(struct nvfx_context *nvfx);

/* nvfx_transfer.c */
extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx);

/* nvfx_vbo.c */
extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
extern void nvfx_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
unsigned count);

/* nvfx_vertprog.c */
extern void nvfx_vertprog_destroy(struct nvfx_context *,
struct nvfx_vertex_program *);

#endif

+ 350
- 0
src/gallium/drivers/nvfx/nvfx_draw.c 파일 보기

@@ -0,0 +1,350 @@
#include "pipe/p_shader_tokens.h"
#include "util/u_inlines.h"
#include "tgsi/tgsi_ureg.h"

#include "util/u_pack_color.h"

#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pipe.h"

#include "nvfx_context.h"
#include "nv30_vertprog.h"
#include "nv40_vertprog.h"

/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
* often at all. Uses "quadro style" vertex submission + a fixed vertex
* layout to avoid the need to generate a vertex program or vtxfmt.
*/

struct nvfx_render_stage {
struct draw_stage stage;
struct nvfx_context *nvfx;
unsigned prim;
};

static INLINE struct nvfx_render_stage *
nvfx_render_stage(struct draw_stage *stage)
{
return (struct nvfx_render_stage *)stage;
}

static INLINE void
nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
{
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
unsigned i;

for (i = 0; i < nvfx->swtnl.nr_attribs; i++) {
unsigned idx = nvfx->swtnl.draw[i];
unsigned hw = nvfx->swtnl.hw[i];

switch (nvfx->swtnl.emit[i]) {
case EMIT_OMIT:
break;
case EMIT_1F:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1);
OUT_RING (chan, fui(v->data[idx][0]));
break;
case EMIT_2F:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2);
OUT_RING (chan, fui(v->data[idx][0]));
OUT_RING (chan, fui(v->data[idx][1]));
break;
case EMIT_3F:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3);
OUT_RING (chan, fui(v->data[idx][0]));
OUT_RING (chan, fui(v->data[idx][1]));
OUT_RING (chan, fui(v->data[idx][2]));
break;
case EMIT_4F:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
OUT_RING (chan, fui(v->data[idx][0]));
OUT_RING (chan, fui(v->data[idx][1]));
OUT_RING (chan, fui(v->data[idx][2]));
OUT_RING (chan, fui(v->data[idx][3]));
break;
case 0xff:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3]));
OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3]));
OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3]));
OUT_RING (chan, fui(1.0f / v->data[idx][3]));
break;
case EMIT_4UB:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1);
OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
float_to_ubyte(v->data[idx][1]),
float_to_ubyte(v->data[idx][2]),
float_to_ubyte(v->data[idx][3])));
break;
default:
assert(0);
break;
}
}
}

static INLINE void
nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
unsigned mode, unsigned count)
{
struct nvfx_render_stage *rs = nvfx_render_stage(stage);
struct nvfx_context *nvfx = rs->nvfx;

struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
unsigned i;

/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
if (AVAIL_RING(chan) < ((count * 20) + 6)) {
if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
NOUVEAU_ERR("AIII, missed flush\n");
assert(0);
}
FIRE_RING(chan);
nvfx_state_emit(nvfx);
}

/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
}

BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, mode);
rs->prim = mode;
}

/* Emit vertex data */
for (i = 0; i < count; i++)
nvfx_render_vertex(nvfx, prim->v[i]);

/* If it's likely we'll need to empty the push buffer soon, finish
* off the primitive now.
*/
if (AVAIL_RING(chan) < ((count * 20) + 6)) {
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
}
}

static void
nvfx_render_point(struct draw_stage *draw, struct prim_header *prim)
{
nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1);
}

static void
nvfx_render_line(struct draw_stage *draw, struct prim_header *prim)
{
nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2);
}

static void
nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim)
{
nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3);
}

static void
nvfx_render_flush(struct draw_stage *draw, unsigned flags)
{
struct nvfx_render_stage *rs = nvfx_render_stage(draw);
struct nvfx_context *nvfx = rs->nvfx;
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;

if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
}
}

static void
nvfx_render_reset_stipple_counter(struct draw_stage *draw)
{
}

static void
nvfx_render_destroy(struct draw_stage *draw)
{
FREE(draw);
}

static struct nvfx_vertex_program *
nvfx_create_drawvp(struct nvfx_context *nvfx)
{
struct ureg_program *ureg;
uint i;

ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
if (ureg == NULL)
return NULL;

ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0));
ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3));
ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4));
ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3));
ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4));
ureg_MOV(ureg,
ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X),
ureg_DECL_vs_input(ureg, 5));
for (i = 0; i < 8; ++i)
ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i));

ureg_END( ureg );

return ureg_create_shader_and_destroy( ureg, &nvfx->pipe );
}

struct draw_stage *
nvfx_draw_render_stage(struct nvfx_context *nvfx)
{
struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage);

if (!nvfx->swtnl.vertprog)
nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx);

render->nvfx = nvfx;
render->stage.draw = nvfx->draw;
render->stage.point = nvfx_render_point;
render->stage.line = nvfx_render_line;
render->stage.tri = nvfx_render_tri;
render->stage.flush = nvfx_render_flush;
render->stage.reset_stipple_counter = nvfx_render_reset_stipple_counter;
render->stage.destroy = nvfx_render_destroy;

return &render->stage;
}

void
nvfx_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf, unsigned idxbuf_size,
unsigned mode, unsigned start, unsigned count)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
unsigned i;
void *map;

if (!nvfx_state_validate_swtnl(nvfx))
return;
nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF);
nvfx_state_emit(nvfx);

for (i = 0; i < nvfx->vtxbuf_nr; i++) {
map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
}

if (idxbuf) {
map = pipe_buffer_map(pscreen, idxbuf,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map);
} else {
draw_set_mapped_element_buffer(nvfx->draw, 0, NULL);
}

if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];

map = pipe_buffer_map(pscreen,
nvfx->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
map, nr);
}

draw_arrays(nvfx->draw, mode, start, count);

for (i = 0; i < nvfx->vtxbuf_nr; i++)
pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer);

if (idxbuf)
pipe_buffer_unmap(pscreen, idxbuf);

if (nvfx->constbuf[PIPE_SHADER_VERTEX])
pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]);

draw_flush(nvfx->draw);
pipe->flush(pipe, 0, NULL);
}

static INLINE void
emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit,
unsigned semantic, unsigned index)
{
unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index);
unsigned a = nvfx->swtnl.nr_attribs++;

nvfx->swtnl.hw[a] = hw;
nvfx->swtnl.emit[a] = emit;
nvfx->swtnl.draw[a] = draw_out;
}

static boolean
nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx)
{
struct nvfx_fragment_program *fp = nvfx->fragprog;
unsigned colour = 0, texcoords = 0, fog = 0, i;

/* Determine needed fragprog inputs */
for (i = 0; i < fp->info.num_inputs; i++) {
switch (fp->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
colour |= (1 << fp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_GENERIC:
texcoords |= (1 << fp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_FOG:
fog = 1;
break;
default:
assert(0);
}
}

nvfx->swtnl.nr_attribs = 0;

/* Map draw vtxprog output to hw attribute IDs */
for (i = 0; i < 2; i++) {
if (!(colour & (1 << i)))
continue;
emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i);
}

for (i = 0; i < 8; i++) {
if (!(texcoords & (1 << i)))
continue;
emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
}

if (fog) {
emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
}

emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0);

return FALSE;
}

struct nvfx_state_entry nvfx_state_vtxfmt = {
.validate = nvfx_state_vtxfmt_validate,
.dirty = {
.pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG,
.hw = 0
}
};

src/gallium/drivers/nv40/nv40_fragprog.c → src/gallium/drivers/nvfx/nvfx_fragprog.c 파일 보기

@@ -7,37 +7,20 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"

#include "nv40_context.h"

#define SWZ_X 0
#define SWZ_Y 1
#define SWZ_Z 2
#define SWZ_W 3
#define MASK_X 1
#define MASK_Y 2
#define MASK_Z 4
#define MASK_W 8
#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
#define DEF_CTEST NV40_FP_OP_COND_TR
#include "nv40_shader.h"

#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
#define neg(s) nv40_sr_neg((s))
#define abs(s) nv40_sr_abs((s))
#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
#include "nvfx_context.h"
#include "nvfx_shader.h"

#define MAX_CONSTS 128
#define MAX_IMM 32
struct nv40_fpc {
struct nv40_fragment_program *fp;
struct nvfx_fpc {
struct nvfx_fragment_program *fp;

uint attrib_map[PIPE_MAX_SHADER_INPUTS];

unsigned r_temps;
unsigned r_temps_discard;
struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
struct nv40_sreg *r_temp;
struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
struct nvfx_sreg *r_temp;

int num_regs;

@@ -50,35 +33,35 @@ struct nv40_fpc {
} consts[MAX_CONSTS];
int nr_consts;

struct nv40_sreg imm[MAX_IMM];
struct nvfx_sreg imm[MAX_IMM];
unsigned nr_imm;
};

static INLINE struct nv40_sreg
temp(struct nv40_fpc *fpc)
static INLINE struct nvfx_sreg
temp(struct nvfx_fpc *fpc)
{
int idx = ffs(~fpc->r_temps) - 1;

if (idx < 0) {
NOUVEAU_ERR("out of temps!!\n");
assert(0);
return nv40_sr(NV40SR_TEMP, 0);
return nvfx_sr(NVFXSR_TEMP, 0);
}

fpc->r_temps |= (1 << idx);
fpc->r_temps_discard |= (1 << idx);
return nv40_sr(NV40SR_TEMP, idx);
return nvfx_sr(NVFXSR_TEMP, idx);
}

static INLINE void
release_temps(struct nv40_fpc *fpc)
release_temps(struct nvfx_fpc *fpc)
{
fpc->r_temps &= ~fpc->r_temps_discard;
fpc->r_temps_discard = 0;
}

static INLINE struct nv40_sreg
constant(struct nv40_fpc *fpc, int pipe, float vals[4])
static INLINE struct nvfx_sreg
constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
{
int idx;

@@ -89,45 +72,45 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4])
fpc->consts[idx].pipe = pipe;
if (pipe == -1)
memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
return nv40_sr(NV40SR_CONST, idx);
return nvfx_sr(NVFXSR_CONST, idx);
}

#define arith(cc,s,o,d,m,s0,s1,s2) \
nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
(d), (m), (s0), (s1), (s2))
#define tex(cc,s,o,u,d,m,s0,s1,s2) \
nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
(d), (m), (s0), none, none)

static void
grow_insns(struct nv40_fpc *fpc, int size)
grow_insns(struct nvfx_fpc *fpc, int size)
{
struct nv40_fragment_program *fp = fpc->fp;
struct nvfx_fragment_program *fp = fpc->fp;

fp->insn_len += size;
fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
}

static void
emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
{
struct nv40_fragment_program *fp = fpc->fp;
struct nvfx_fragment_program *fp = fpc->fp;
uint32_t *hw = &fp->insn[fpc->inst_offset];
uint32_t sr = 0;

switch (src.type) {
case NV40SR_INPUT:
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
case NVFXSR_INPUT:
sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
break;
case NV40SR_OUTPUT:
sr |= NV40_FP_REG_SRC_HALF;
case NVFXSR_OUTPUT:
sr |= NVFX_FP_REG_SRC_HALF;
/* fall-through */
case NV40SR_TEMP:
sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
case NVFXSR_TEMP:
sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
break;
case NV40SR_CONST:
case NVFXSR_CONST:
if (!fpc->have_const) {
grow_insns(fpc, 4);
fpc->have_const = 1;
@@ -135,7 +118,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)

hw = &fp->insn[fpc->inst_offset];
if (fpc->consts[src.index].pipe >= 0) {
struct nv40_fragment_program_data *fpd;
struct nvfx_fragment_program_data *fpd;

fp->consts = realloc(fp->consts, ++fp->nr_consts *
sizeof(*fpd));
@@ -149,63 +132,63 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
sizeof(uint32_t) * 4);
}

sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
break;
case NV40SR_NONE:
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
case NVFXSR_NONE:
sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
break;
default:
assert(0);
}

if (src.negate)
sr |= NV40_FP_REG_NEGATE;
sr |= NVFX_FP_REG_NEGATE;

if (src.abs)
hw[1] |= (1 << (29 + pos));

sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
(src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
(src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
(src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
(src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
(src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
(src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));

hw[pos + 1] |= sr;
}

static void
emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
{
struct nv40_fragment_program *fp = fpc->fp;
struct nvfx_fragment_program *fp = fpc->fp;
uint32_t *hw = &fp->insn[fpc->inst_offset];

switch (dst.type) {
case NV40SR_TEMP:
case NVFXSR_TEMP:
if (fpc->num_regs < (dst.index + 1))
fpc->num_regs = dst.index + 1;
break;
case NV40SR_OUTPUT:
case NVFXSR_OUTPUT:
if (dst.index == 1) {
fp->fp_control |= 0xe;
} else {
hw[0] |= NV40_FP_OP_OUT_REG_HALF;
hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
}
break;
case NV40SR_NONE:
case NVFXSR_NONE:
hw[0] |= (1 << 30);
break;
default:
assert(0);
}

hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
}

static void
nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
struct nv40_sreg dst, int mask,
struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
struct nvfx_sreg dst, int mask,
struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
{
struct nv40_fragment_program *fp = fpc->fp;
struct nvfx_fragment_program *fp = fpc->fp;
uint32_t *hw;

fpc->inst_offset = fp->insn_len;
@@ -214,22 +197,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
hw = &fp->insn[fpc->inst_offset];
memset(hw, 0, sizeof(uint32_t) * 4);

if (op == NV40_FP_OP_OPCODE_KIL)
fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
if (op == NVFX_FP_OP_OPCODE_KIL)
fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);

if (sat)
hw[0] |= NV40_FP_OP_OUT_SAT;
hw[0] |= NVFX_FP_OP_OUT_SAT;

if (dst.cc_update)
hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
(dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
(dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
(dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
(dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
(dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
(dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));

emit_dst(fpc, dst);
emit_src(fpc, 0, s0);
@@ -238,26 +221,26 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
}

static void
nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
struct nv40_sreg dst, int mask,
struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit,
struct nvfx_sreg dst, int mask,
struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
{
struct nv40_fragment_program *fp = fpc->fp;
struct nvfx_fragment_program *fp = fpc->fp;

nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);

fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
fp->samplers |= (1 << unit);
}

static INLINE struct nv40_sreg
tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
static INLINE struct nvfx_sreg
tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
struct nv40_sreg src;
struct nvfx_sreg src;

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
src = nv40_sr(NV40SR_INPUT,
src = nvfx_sr(NVFXSR_INPUT,
fpc->attrib_map[fsrc->Register.Index]);
break;
case TGSI_FILE_CONSTANT:
@@ -288,18 +271,18 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
return src;
}

static INLINE struct nv40_sreg
tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
static INLINE struct nvfx_sreg
tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
return fpc->r_result[fdst->Register.Index];
case TGSI_FILE_TEMPORARY:
return fpc->r_temp[fdst->Register.Index];
case TGSI_FILE_NULL:
return nv40_sr(NV40SR_NONE, 0);
return nvfx_sr(NVFXSR_NONE, 0);
default:
NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
return nv40_sr(NV40SR_NONE, 0);
return nvfx_sr(NVFXSR_NONE, 0);
}
}

@@ -308,52 +291,19 @@ tgsi_mask(uint tgsi)
{
int mask = 0;

if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
return mask;
}

static boolean
src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
struct nv40_sreg *src)
{
const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
uint mask = 0;
uint c;

for (c = 0; c < 4; c++) {
switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
case TGSI_SWIZZLE_X:
case TGSI_SWIZZLE_Y:
case TGSI_SWIZZLE_Z:
case TGSI_SWIZZLE_W:
mask |= (1 << c);
break;
default:
assert(0);
}
}

if (mask == MASK_ALL)
return TRUE;

*src = temp(fpc);

if (mask)
arith(fpc, 0, MOV, *src, mask, tgsi, none, none);

return FALSE;
}

static boolean
nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
const struct tgsi_full_instruction *finst)
{
const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
struct nv40_sreg src[3], dst, tmp;
const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
struct nvfx_sreg src[3], dst, tmp;
int mask, sat, unit;
int ai = -1, ci = -1, ii = -1;
int i;
@@ -375,17 +325,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,

fsrc = &finst->Src[i];

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
if (!src_native_swz(fpc, fsrc, &src[i]))
continue;
break;
default:
break;
}

switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
if (ai == -1 || ai == fsrc->Register.Index) {
@@ -393,7 +332,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
arith(fpc, 0, MOV, src[i], MASK_ALL,
arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
tgsi_src(fpc, fsrc), none, none);
}
break;
@@ -404,7 +343,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
arith(fpc, 0, MOV, src[i], MASK_ALL,
arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
tgsi_src(fpc, fsrc), none, none);
}
break;
@@ -415,7 +354,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
arith(fpc, 0, MOV, src[i], MASK_ALL,
arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
tgsi_src(fpc, fsrc), none, none);
}
break;
@@ -445,25 +384,25 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
tmp = nv40_sr(NV40SR_NONE, 0);
tmp = nvfx_sr(NVFXSR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
dst.cc_test = NV40_VP_INST_COND_GE;
dst.cc_test = NVFX_COND_GE;
arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV40_VP_INST_COND_LT;
dst.cc_test = NVFX_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
case TGSI_OPCODE_COS:
arith(fpc, sat, COS, dst, mask, src[0], none, none);
break;
case TGSI_OPCODE_DDX:
if (mask & (MASK_Z | MASK_W)) {
if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
tmp = temp(fpc);
arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y,
arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
swz(src[0], Z, W, Z, W), none, none);
arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
swz(tmp, X, Y, X, Y), none, none);
arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0],
arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
none, none);
arith(fpc, 0, MOV, dst, mask, tmp, none, none);
} else {
@@ -471,13 +410,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
}
break;
case TGSI_OPCODE_DDY:
if (mask & (MASK_Z | MASK_W)) {
if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
tmp = temp(fpc);
arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y,
arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
swz(src[0], Z, W, Z, W), none, none);
arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
swz(tmp, X, Y, X, Y), none, none);
arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0],
arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
none, none);
arith(fpc, 0, MOV, dst, mask, tmp, none, none);
} else {
@@ -492,7 +431,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
break;
case TGSI_OPCODE_DPH:
tmp = temp(fpc);
arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none);
arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
swz(src[1], W, W, W, W), none);
break;
@@ -512,10 +451,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
arith(fpc, 0, KIL, none, 0, none, none, none);
break;
case TGSI_OPCODE_KIL:
dst = nv40_sr(NV40SR_NONE, 0);
dst = nvfx_sr(NVFXSR_NONE, 0);
dst.cc_update = 1;
arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none);
dst.cc_update = 0; dst.cc_test = NVFX_COND_LT;
arith(fpc, 0, KIL, dst, 0, none, none, none);
break;
case TGSI_OPCODE_LG2:
@@ -523,9 +462,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
break;
// case TGSI_OPCODE_LIT:
case TGSI_OPCODE_LRP:
tmp = temp(fpc);
arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
if(!nvfx->is_nv4x)
arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
else {
tmp = temp(fpc);
arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
}
break;
case TGSI_OPCODE_MAD:
arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
@@ -543,13 +486,17 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_POW:
tmp = temp(fpc);
arith(fpc, 0, LG2, tmp, MASK_X,
swz(src[0], X, X, X, X), none, none);
arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
swz(src[1], X, X, X, X), none);
arith(fpc, sat, EX2, dst, mask,
swz(tmp, X, X, X, X), none, none);
if(!nvfx->is_nv4x)
arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
else {
tmp = temp(fpc);
arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X,
swz(src[0], X, X, X, X), none, none);
arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),
swz(src[1], X, X, X, X), none);
arith(fpc, sat, EX2, dst, mask,
swz(tmp, X, X, X, X), none, none);
}
break;
case TGSI_OPCODE_RCP:
arith(fpc, sat, RCP, dst, mask, src[0], none, none);
@@ -558,42 +505,50 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
assert(0);
break;
case TGSI_OPCODE_RFL:
tmp = temp(fpc);
arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
arith(fpc, sat, MAD, dst, mask,
swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
if(!nvfx->is_nv4x)
arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
else {
tmp = temp(fpc);
arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none);
arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none);
arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z,
swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
arith(fpc, sat, MAD, dst, mask,
swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
}
break;
case TGSI_OPCODE_RSQ:
tmp = temp(fpc);
arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
abs(swz(src[0], X, X, X, X)), none, none);
arith(fpc, sat, EX2, dst, mask,
neg(swz(tmp, X, X, X, X)), none, none);
if(!nvfx->is_nv4x)
arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
else {
tmp = temp(fpc);
arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X,
abs(swz(src[0], X, X, X, X)), none, none);
arith(fpc, sat, EX2, dst, mask,
neg(swz(tmp, X, X, X, X)), none, none);
}
break;
case TGSI_OPCODE_SCS:
/* avoid overwriting the source */
if(src[0].swz[SWZ_X] != SWZ_X)
if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
{
if (mask & MASK_X) {
arith(fpc, sat, COS, dst, MASK_X,
if (mask & NVFX_FP_MASK_X) {
arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
swz(src[0], X, X, X, X), none, none);
}
if (mask & MASK_Y) {
arith(fpc, sat, SIN, dst, MASK_Y,
if (mask & NVFX_FP_MASK_Y) {
arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
swz(src[0], X, X, X, X), none, none);
}
}
else
{
if (mask & MASK_Y) {
arith(fpc, sat, SIN, dst, MASK_Y,
if (mask & NVFX_FP_MASK_Y) {
arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
swz(src[0], X, X, X, X), none, none);
}
if (mask & MASK_X) {
arith(fpc, sat, COS, dst, MASK_X,
if (mask & NVFX_FP_MASK_X) {
arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
swz(src[0], X, X, X, X), none, none);
}
}
@@ -641,7 +596,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
tmp = temp(fpc);
arith(fpc, 0, MUL, tmp, mask,
swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W),
swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
neg(tmp));
break;
@@ -655,32 +610,32 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
}

static boolean
nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
int hw;

switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV40_FP_OP_INPUT_SRC_POSITION;
hw = NVFX_FP_OP_INPUT_SRC_POSITION;
break;
case TGSI_SEMANTIC_COLOR:
if (fdec->Semantic.Index == 0) {
hw = NV40_FP_OP_INPUT_SRC_COL0;
hw = NVFX_FP_OP_INPUT_SRC_COL0;
} else
if (fdec->Semantic.Index == 1) {
hw = NV40_FP_OP_INPUT_SRC_COL1;
hw = NVFX_FP_OP_INPUT_SRC_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
return FALSE;
}
break;
case TGSI_SEMANTIC_FOG:
hw = NV40_FP_OP_INPUT_SRC_FOGC;
hw = NVFX_FP_OP_INPUT_SRC_FOGC;
break;
case TGSI_SEMANTIC_GENERIC:
if (fdec->Semantic.Index <= 7) {
hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
@@ -697,7 +652,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
}

static boolean
nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
unsigned idx = fdec->Range.First;
@@ -708,12 +663,14 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
hw = 1;
break;
case TGSI_SEMANTIC_COLOR:
hw = ~0;
switch (fdec->Semantic.Index) {
case 0: hw = 0; break;
case 1: hw = 2; break;
case 2: hw = 3; break;
case 3: hw = 4; break;
default:
}
if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
NOUVEAU_ERR("bad rcol index\n");
return FALSE;
}
@@ -723,13 +680,13 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
return FALSE;
}

fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
fpc->r_temps |= (1 << hw);
return TRUE;
}

static boolean
nv40_fragprog_prepare(struct nv40_fpc *fpc)
nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
{
struct tgsi_parse_context p;
int high_temp = -1, i;
@@ -746,11 +703,11 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
fdec = &p.FullToken.FullDeclaration;
switch (fdec->Declaration.File) {
case TGSI_FILE_INPUT:
if (!nv40_fragprog_parse_decl_attrib(fpc, fdec))
if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec))
goto out_err;
break;
case TGSI_FILE_OUTPUT:
if (!nv40_fragprog_parse_decl_output(fpc, fdec))
if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
goto out_err;
break;
case TGSI_FILE_TEMPORARY:
@@ -787,7 +744,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
tgsi_parse_free(&p);

if (++high_temp) {
fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
for (i = 0; i < high_temp; i++)
fpc->r_temp[i] = temp(fpc);
fpc->r_temps_discard = 0;
@@ -803,19 +760,19 @@ out_err:
}

static void
nv40_fragprog_translate(struct nv40_context *nv40,
struct nv40_fragment_program *fp)
nvfx_fragprog_translate(struct nvfx_context *nvfx,
struct nvfx_fragment_program *fp)
{
struct tgsi_parse_context parse;
struct nv40_fpc *fpc = NULL;
struct nvfx_fpc *fpc = NULL;

fpc = CALLOC(1, sizeof(struct nv40_fpc));
fpc = CALLOC(1, sizeof(struct nvfx_fpc));
if (!fpc)
return;
fpc->fp = fp;
fpc->num_regs = 2;

if (!nv40_fragprog_prepare(fpc)) {
if (!nvfx_fragprog_prepare(nvfx, fpc)) {
FREE(fpc);
return;
}
@@ -831,7 +788,7 @@ nv40_fragprog_translate(struct nv40_context *nv40,
const struct tgsi_full_instruction *finst;

finst = &parse.FullToken.FullInstruction;
if (!nv40_fragprog_parse_instruction(fpc, finst))
if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
goto out_err;
}
break;
@@ -840,7 +797,10 @@ nv40_fragprog_translate(struct nv40_context *nv40,
}
}

fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
if(!nvfx->is_nv4x)
fp->fp_control |= (fpc->num_regs-1)/2;
else
fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;

/* Terminate final instruction */
fp->insn[fpc->inst_offset] |= 0x00000001;
@@ -862,10 +822,10 @@ out_err:
}

static void
nv40_fragprog_upload(struct nv40_context *nv40,
struct nv40_fragment_program *fp)
nvfx_fragprog_upload(struct nvfx_context *nvfx,
struct nvfx_fragment_program *fp)
{
struct pipe_screen *pscreen = nv40->pipe.screen;
struct pipe_screen *pscreen = nvfx->pipe.screen;
const uint32_t le = 1;
uint32_t *map;
int i;
@@ -896,12 +856,12 @@ nv40_fragprog_upload(struct nv40_context *nv40,
}

static boolean
nv40_fragprog_validate(struct nv40_context *nv40)
nvfx_fragprog_validate(struct nvfx_context *nvfx)
{
struct nv40_fragment_program *fp = nv40->fragprog;
struct nvfx_fragment_program *fp = nvfx->fragprog;
struct pipe_buffer *constbuf =
nv40->constbuf[PIPE_SHADER_FRAGMENT];
struct pipe_screen *pscreen = nv40->pipe.screen;
nvfx->constbuf[PIPE_SHADER_FRAGMENT];
struct pipe_screen *pscreen = nvfx->pipe.screen;
struct nouveau_stateobj *so;
boolean new_consts = FALSE;
int i;
@@ -909,24 +869,31 @@ nv40_fragprog_validate(struct nv40_context *nv40)
if (fp->translated)
goto update_constants;

nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG;
nv40_fragprog_translate(nv40, fp);
nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG;
nvfx_fragprog_translate(nvfx, fp);
if (!fp->translated) {
nv40->fallback_swrast |= NV40_NEW_FRAGPROG;
nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG;
return FALSE;
}

fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
nv40_fragprog_upload(nv40, fp);
nvfx_fragprog_upload(nvfx, fp);

so = so_new(2, 2, 1);
so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
so = so_new(4, 4, 1);
so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1);
so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
NV40TCL_FP_ADDRESS_DMA1);
so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1);
NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1);
so_data (so, fp->fp_control);
if(!nvfx->is_nv4x) {
so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1);
so_data (so, (1<<16)|0x4);
so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1);
so_data (so, fp->samplers);
}

so_ref(so, &fp->so);
so_ref(NULL, &so);

@@ -937,7 +904,7 @@ update_constants:
map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < fp->nr_consts; i++) {
struct nv40_fragment_program_data *fpd = &fp->consts[i];
struct nvfx_fragment_program_data *fpd = &fp->consts[i];
uint32_t *p = &fp->insn[fpd->offset];
uint32_t *cb = (uint32_t *)&map[fpd->index * 4];

@@ -949,11 +916,11 @@ update_constants:
pipe_buffer_unmap(pscreen, constbuf);

if (new_consts)
nv40_fragprog_upload(nv40, fp);
nvfx_fragprog_upload(nvfx, fp);
}

if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) {
so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]);
if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) {
so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]);
return TRUE;
}

@@ -961,8 +928,8 @@ update_constants:
}

void
nv40_fragprog_destroy(struct nv40_context *nv40,
struct nv40_fragment_program *fp)
nvfx_fragprog_destroy(struct nvfx_context *nvfx,
struct nvfx_fragment_program *fp)
{
if (fp->buffer)
pipe_buffer_reference(&fp->buffer, NULL);
@@ -974,11 +941,10 @@ nv40_fragprog_destroy(struct nv40_context *nv40,
FREE(fp->insn);
}

struct nv40_state_entry nv40_state_fragprog = {
.validate = nv40_fragprog_validate,
struct nvfx_state_entry nvfx_state_fragprog = {
.validate = nvfx_fragprog_validate,
.dirty = {
.pipe = NV40_NEW_FRAGPROG,
.hw = NV40_STATE_FRAGPROG
.pipe = NVFX_NEW_FRAGPROG,
.hw = NVFX_STATE_FRAGPROG
}
};


+ 49
- 0
src/gallium/drivers/nvfx/nvfx_fragtex.c 파일 보기

@@ -0,0 +1,49 @@
#include "nvfx_context.h"

static boolean
nvfx_fragtex_validate(struct nvfx_context *nvfx)
{
struct nvfx_fragment_program *fp = nvfx->fragprog;
struct nvfx_state *state = &nvfx->state;
struct nouveau_stateobj *so;
unsigned samplers, unit;

samplers = state->fp_samplers & ~fp->samplers;
while (samplers) {
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);

so = so_new(1, 1, 0);
so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]);
so_ref(NULL, &so);
state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit));
}

samplers = nvfx->dirty_samplers & fp->samplers;
while (samplers) {
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);

if(!nvfx->is_nv4x)
so = nv30_fragtex_build(nvfx, unit);
else
so = nv40_fragtex_build(nvfx, unit);

so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]);
so_ref(NULL, &so);
state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit));
}

nvfx->state.fp_samplers = fp->samplers;
return FALSE;
}

struct nvfx_state_entry nvfx_state_fragtex = {
.validate = nvfx_fragtex_validate,
.dirty = {
.pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG,
.hw = 0
}
};

src/gallium/drivers/nv40/nv40_miptree.c → src/gallium/drivers/nvfx/nvfx_miptree.c 파일 보기

@@ -4,13 +4,13 @@
#include "util/u_format.h"
#include "util/u_math.h"

#include "nv40_context.h"
#include "../nouveau/nv04_surface_2d.h"
#include "nvfx_context.h"
#include "nv04_surface_2d.h"



static void
nv40_miptree_layout(struct nv40_miptree *mt)
nvfx_miptree_layout(struct nvfx_miptree *mt)
{
struct pipe_texture *pt = &mt->base;
uint width = pt->width0;
@@ -62,13 +62,13 @@ nv40_miptree_layout(struct nv40_miptree *mt)
}

static struct pipe_texture *
nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
{
struct nv40_miptree *mt;
struct nvfx_miptree *mt;
unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL |
NOUVEAU_BUFFER_USAGE_TEXTURE;

mt = MALLOC(sizeof(struct nv40_miptree));
mt = MALLOC(sizeof(struct nvfx_miptree));
if (!mt)
return NULL;
mt->base = *pt;
@@ -89,6 +89,18 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else {
switch (pt->format) {
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
/* TODO: we can actually swizzle these formats on nv40, we
are just preserving the pre-unification behavior.
The whole 2D code is going to be rewritten anyway. */
if(nvfx_screen(pscreen)->is_nv4x) {
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
break;
}
/* TODO: Figure out which formats can be swizzled */
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -112,7 +124,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;

nv40_miptree_layout(mt);
nvfx_miptree_layout(mt);

mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size);
if (!mt->buffer) {
@@ -124,17 +136,17 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
}

static struct pipe_texture *
nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
nvfx_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
const unsigned *stride, struct pipe_buffer *pb)
{
struct nv40_miptree *mt;
struct nvfx_miptree *mt;

/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
pt->depth0 != 1)
return NULL;

mt = CALLOC_STRUCT(nv40_miptree);
mt = CALLOC_STRUCT(nvfx_miptree);
if (!mt)
return NULL;

@@ -153,9 +165,9 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
}

static void
nv40_miptree_destroy(struct pipe_texture *pt)
nvfx_miptree_destroy(struct pipe_texture *pt)
{
struct nv40_miptree *mt = (struct nv40_miptree *)pt;
struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
int l;

pipe_buffer_reference(&mt->buffer, NULL);
@@ -168,11 +180,11 @@ nv40_miptree_destroy(struct pipe_texture *pt)
}

static struct pipe_surface *
nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
unsigned flags)
{
struct nv40_miptree *mt = (struct nv40_miptree *)pt;
struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
struct nv04_surface *ns;

ns = CALLOC_STRUCT(nv04_surface);
@@ -202,21 +214,21 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
* Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
* ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base;
return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base;

return &ns->base;
}

static void
nv40_miptree_surface_del(struct pipe_surface *ps)
nvfx_miptree_surface_del(struct pipe_surface *ps)
{
struct nv04_surface* ns = (struct nv04_surface*)ps;
if(ns->backing)
{
struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen;
struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen;
if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
nv40_miptree_surface_del(&ns->backing->base);
nvfx_miptree_surface_del(&ns->backing->base);
}

pipe_texture_reference(&ps->texture, NULL);
@@ -224,13 +236,12 @@ nv40_miptree_surface_del(struct pipe_surface *ps)
}

void
nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
nvfx_screen_init_miptree_functions(struct pipe_screen *pscreen)
{
pscreen->texture_create = nv40_miptree_create;
pscreen->texture_destroy = nv40_miptree_destroy;
pscreen->get_tex_surface = nv40_miptree_surface_new;
pscreen->tex_surface_destroy = nv40_miptree_surface_del;
pscreen->texture_create = nvfx_miptree_create;
pscreen->texture_destroy = nvfx_miptree_destroy;
pscreen->get_tex_surface = nvfx_miptree_surface_new;
pscreen->tex_surface_destroy = nvfx_miptree_surface_del;

nouveau_screen(pscreen)->texture_blanket = nv40_miptree_blanket;
nouveau_screen(pscreen)->texture_blanket = nvfx_miptree_blanket;
}


+ 127
- 0
src/gallium/drivers/nvfx/nvfx_query.c 파일 보기

@@ -0,0 +1,127 @@
#include "pipe/p_context.h"

#include "nvfx_context.h"

struct nvfx_query {
struct nouveau_resource *object;
unsigned type;
boolean ready;
uint64_t result;
};

static INLINE struct nvfx_query *
nvfx_query(struct pipe_query *pipe)
{
return (struct nvfx_query *)pipe;
}

static struct pipe_query *
nvfx_query_create(struct pipe_context *pipe, unsigned query_type)
{
struct nvfx_query *q;

q = CALLOC(1, sizeof(struct nvfx_query));
q->type = query_type;

return (struct pipe_query *)q;
}

static void
nvfx_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nvfx_query *q = nvfx_query(pq);

if (q->object)
nouveau_resource_free(&q->object);
FREE(q);
}

static void
nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_query *q = nvfx_query(pq);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;

assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);

/* Happens when end_query() is called, then another begin_query()
* without querying the result in-between. For now we'll wait for
* the existing query to notify completion, but it could be better.
*/
if (q->object) {
uint64_t tmp;
pipe->get_query_result(pipe, pq, 1, &tmp);
}

if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object))
assert(0);
nouveau_notifier_reset(nvfx->screen->query, q->object->start);

BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1);
OUT_RING (chan, 1);

q->ready = FALSE;
}

static void
nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
struct nvfx_query *q = nvfx_query(pq);

BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1);
OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
FIRE_RING(chan);
}

static boolean
nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, uint64_t *result)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_query *q = nvfx_query(pq);

assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);

if (!q->ready) {
unsigned status;

status = nouveau_notifier_status(nvfx->screen->query,
q->object->start);
if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
if (wait == FALSE)
return FALSE;

nouveau_notifier_wait_status(nvfx->screen->query,
q->object->start,
NV_NOTIFY_STATE_STATUS_COMPLETED, 0);
}

q->result = nouveau_notifier_return_val(nvfx->screen->query,
q->object->start);
q->ready = TRUE;
nouveau_resource_free(&q->object);
}

*result = q->result;
return TRUE;
}

void
nvfx_init_query_functions(struct nvfx_context *nvfx)
{
nvfx->pipe.create_query = nvfx_query_create;
nvfx->pipe.destroy_query = nvfx_query_destroy;
nvfx->pipe.begin_query = nvfx_query_begin;
nvfx->pipe.end_query = nvfx_query_end;
nvfx->pipe.get_query_result = nvfx_query_result;
}

src/gallium/drivers/nv30/nv30_screen.c → src/gallium/drivers/nvfx/nvfx_screen.c 파일 보기

@@ -3,18 +3,18 @@

#include "nouveau/nouveau_screen.h"

#include "nv30_context.h"
#include "nv30_screen.h"
#include "nvfx_context.h"
#include "nvfx_screen.h"

#define NV30TCL_CHIPSET_3X_MASK 0x00000003
#define NV34TCL_CHIPSET_3X_MASK 0x00000010
#define NV35TCL_CHIPSET_3X_MASK 0x000001e0

/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
* to get the pointer to the context front buffer, so I copied nouveau_winsys here.
* nv30_screen_surface_format_supported() can then use it to enforce creating fbo
* with same number of bits everywhere.
*/
* to get the pointer to the context front buffer, so I copied nouveau_winsys here.
* nv30_screen_surface_format_supported() can then use it to enforce creating fbo
* with same number of bits everywhere.
*/
struct nouveau_winsys {
struct pipe_winsys base;

@@ -22,15 +22,21 @@ struct nouveau_winsys {

struct pipe_surface *front;
};
#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
#define NV6X_GRCLASS4497_CHIPSETS 0x00000088

static int
nv30_screen_get_param(struct pipe_screen *pscreen, int param)
nvfx_screen_get_param(struct pipe_screen *pscreen, int param)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);

switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 8;
/* TODO: check this */
return screen->is_nv4x ? 16 : 8;
case PIPE_CAP_NPOT_TEXTURES:
return 0;
return !!screen->is_nv4x;
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
case PIPE_CAP_GLSL:
@@ -40,7 +46,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_POINT_SPRITE:
return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 2;
return screen->is_nv4x ? 4 : 2;
case PIPE_CAP_OCCLUSION_QUERY:
return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
@@ -52,21 +58,26 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 13;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
return 0;
return !!screen->is_nv4x;
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
return 0;
return 0; /* We have 4 on nv40 - but unsupported currently */
case PIPE_CAP_TGSI_CONT_SUPPORTED:
return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 0;
return !!screen->is_nv4x;
case NOUVEAU_CAP_HW_VTXBUF:
/* TODO: this is almost surely wrong */
return !!screen->is_nv4x;
case NOUVEAU_CAP_HW_IDXBUF:
return 1;
/* TODO: this is also almost surely wrong */
return screen->is_nv4x && screen->eng3d->grclass == NV40TCL;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 16;
case PIPE_CAP_INDEP_BLEND_ENABLE:
/* TODO: on nv40 we have separate color masks */
/* TODO: nv40 mrt blending is probably broken */
return 0;
case PIPE_CAP_INDEP_BLEND_FUNC:
return 0;
@@ -83,8 +94,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param)
}

static float
nv30_screen_get_paramf(struct pipe_screen *pscreen, int param)
nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);

switch (param) {
case PIPE_CAP_MAX_LINE_WIDTH:
case PIPE_CAP_MAX_LINE_WIDTH_AA:
@@ -93,9 +106,9 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param)
case PIPE_CAP_MAX_POINT_WIDTH_AA:
return 64.0;
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
return 8.0;
return screen->is_nv4x ? 16.0 : 8.0;
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
return 4.0;
return screen->is_nv4x ? 16.0 : 4.0;
default:
NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
return 0.0;
@@ -103,11 +116,12 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param)
}

static boolean
nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned tex_usage, unsigned geom_flags)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);
struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;

if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
@@ -125,9 +139,9 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_X8Z24_UNORM:
return TRUE;
case PIPE_FORMAT_Z16_UNORM:
if (front) {
/* TODO: this nv30 limitation probably does not exist */
if (!screen->is_nv4x && front)
return (front->format == PIPE_FORMAT_B5G6R5_UNORM);
}
return TRUE;
default:
break;
@@ -144,7 +158,14 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_Z16_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
return TRUE;
/* TODO: does nv30 support this? */
case PIPE_FORMAT_R16_SNORM:
return !!screen->is_nv4x;
default:
break;
}
@@ -154,20 +175,20 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
}

static struct pipe_buffer *
nv30_surface_buffer(struct pipe_surface *surf)
nvfx_surface_buffer(struct pipe_surface *surf)
{
struct nv30_miptree *mt = (struct nv30_miptree *)surf->texture;
struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture;

return mt->buffer;
}

static void
nv30_screen_destroy(struct pipe_screen *pscreen)
nvfx_screen_destroy(struct pipe_screen *pscreen)
{
struct nv30_screen *screen = nv30_screen(pscreen);
struct nvfx_screen *screen = nvfx_screen(pscreen);
unsigned i;

for (i = 0; i < NV30_STATE_MAX; i++) {
for (i = 0; i < NVFX_STATE_MAX; i++) {
if (screen->state[i])
so_ref(NULL, &screen->state[i]);
}
@@ -177,7 +198,7 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
nouveau_resource_destroy(&screen->query_heap);
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->rankine);
nouveau_grobj_free(&screen->eng3d);
nv04_surface_2d_takedown(&screen->eng2d);

nouveau_screen_fini(&screen->base);
@@ -185,59 +206,155 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
FREE(pscreen);
}

static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so)
{
int i;

/* TODO: perhaps we should do some of this on nv40 too? */
for (i=1; i<8; i++) {
so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1);
so_data (so, 0);
so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_VERT(i), 1);
so_data (so, 0);
}

so_method(so, screen->eng3d, 0x220, 1);
so_data (so, 1);

so_method(so, screen->eng3d, 0x03b0, 1);
so_data (so, 0x00100000);
so_method(so, screen->eng3d, 0x1454, 1);
so_data (so, 0);
so_method(so, screen->eng3d, 0x1d80, 1);
so_data (so, 3);
so_method(so, screen->eng3d, 0x1450, 1);
so_data (so, 0x00030004);

/* NEW */
so_method(so, screen->eng3d, 0x1e98, 1);
so_data (so, 0);
so_method(so, screen->eng3d, 0x17e0, 3);
so_data (so, fui(0.0));
so_data (so, fui(0.0));
so_data (so, fui(1.0));
so_method(so, screen->eng3d, 0x1f80, 16);
for (i=0; i<16; i++) {
so_data (so, (i==8) ? 0x0000ffff : 0);
}

so_method(so, screen->eng3d, 0x120, 3);
so_data (so, 0);
so_data (so, 1);
so_data (so, 2);

so_method(so, screen->eng3d, 0x1d88, 1);
so_data (so, 0x00001200);

so_method(so, screen->eng3d, NV34TCL_RC_ENABLE, 1);
so_data (so, 0);

so_method(so, screen->eng3d, NV34TCL_DEPTH_RANGE_NEAR, 2);
so_data (so, fui(0.0));
so_data (so, fui(1.0));

so_method(so, screen->eng3d, NV34TCL_MULTISAMPLE_CONTROL, 1);
so_data (so, 0xffff0000);

/* enables use of vp rather than fixed-function somehow */
so_method(so, screen->eng3d, 0x1e94, 1);
so_data (so, 0x13);
}

static void nv40_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so)
{
so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2);
so_data (so, screen->base.channel->vram->handle);
so_data (so, screen->base.channel->vram->handle);

so_method(so, screen->eng3d, 0x1ea4, 3);
so_data (so, 0x00000010);
so_data (so, 0x01000100);
so_data (so, 0xff800006);

/* vtxprog output routing */
so_method(so, screen->eng3d, 0x1fc4, 1);
so_data (so, 0x06144321);
so_method(so, screen->eng3d, 0x1fc8, 2);
so_data (so, 0xedcba987);
so_data (so, 0x00000021);
so_method(so, screen->eng3d, 0x1fd0, 1);
so_data (so, 0x00171615);
so_method(so, screen->eng3d, 0x1fd4, 1);
so_data (so, 0x001b1a19);

so_method(so, screen->eng3d, 0x1ef8, 1);
so_data (so, 0x0020ffff);
so_method(so, screen->eng3d, 0x1d64, 1);
so_data (so, 0x00d30000);
so_method(so, screen->eng3d, 0x1e94, 1);
so_data (so, 0x00000001);
}

struct pipe_screen *
nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen);
struct nouveau_channel *chan;
struct pipe_screen *pscreen;
struct nouveau_stateobj *so;
unsigned rankine_class = 0;
int ret, i;
unsigned eng3d_class = 0;
int ret;

if (!screen)
return NULL;

pscreen = &screen->base.base;

ret = nouveau_screen_init(&screen->base, dev);
if (ret) {
nv30_screen_destroy(pscreen);
nvfx_screen_destroy(pscreen);
return NULL;
}
chan = screen->base.channel;

pscreen->winsys = ws;
pscreen->destroy = nv30_screen_destroy;
pscreen->get_param = nv30_screen_get_param;
pscreen->get_paramf = nv30_screen_get_paramf;
pscreen->is_format_supported = nv30_screen_surface_format_supported;
pscreen->context_create = nv30_create;

nv30_screen_init_miptree_functions(pscreen);
pscreen->destroy = nvfx_screen_destroy;
pscreen->get_param = nvfx_screen_get_param;
pscreen->get_paramf = nvfx_screen_get_paramf;
pscreen->is_format_supported = nvfx_screen_surface_format_supported;
pscreen->context_create = nvfx_create;

/* 3D object */
switch (dev->chipset & 0xf0) {
case 0x30:
if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
rankine_class = 0x0397;
else
if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
rankine_class = 0x0697;
else
if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
rankine_class = 0x0497;
eng3d_class = 0x0397;
else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
eng3d_class = 0x0697;
else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
eng3d_class = 0x0497;
break;
default:
case 0x40:
if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
eng3d_class = NV40TCL;
else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
eng3d_class = NV44TCL;
screen->is_nv4x = ~0;
break;
case 0x60:
if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
eng3d_class = NV44TCL;
screen->is_nv4x = ~0;
break;
}

if (!rankine_class) {
NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", dev->chipset);
if (!eng3d_class) {
NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset);
return NULL;
}

ret = nouveau_grobj_alloc(chan, 0xbeef3097, rankine_class,
&screen->rankine);
nvfx_screen_init_miptree_functions(pscreen);

ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d);
if (ret) {
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
@@ -245,13 +362,13 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)

/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
screen->eng2d->buf = nv30_surface_buffer;
screen->eng2d->buf = nvfx_surface_buffer;

/* Notifier for sync purposes */
ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
if (ret) {
NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
nv30_screen_destroy(pscreen);
nvfx_screen_destroy(pscreen);
return NULL;
}

@@ -259,99 +376,54 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query);
if (ret) {
NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
nv30_screen_destroy(pscreen);
nvfx_screen_destroy(pscreen);
return NULL;
}

ret = nouveau_resource_init(&screen->query_heap, 0, 32);
if (ret) {
NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
nv30_screen_destroy(pscreen);
nvfx_screen_destroy(pscreen);
return NULL;
}

/* Vtxprog resources */
if (nouveau_resource_init(&screen->vp_exec_heap, 0, 256) ||
if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) ||
nouveau_resource_init(&screen->vp_data_heap, 0, 256)) {
nv30_screen_destroy(pscreen);
nvfx_screen_destroy(pscreen);
return NULL;
}

/* Static rankine initialisation */
so = so_new(36, 60, 0);
so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
/* Static eng3d initialisation */
/* make the so big and don't worry about exact values
since we it will be thrown away immediately after use */
so = so_new(256, 256, 0);
so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->gart->handle);
so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1);
so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1);
so_data (so, chan->vram->handle);
so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2);
so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->vram->handle);
so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2);
so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->gart->handle);
/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2);
so_data (so, 0);
so_data (so, screen->query->handle);*/
so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1);
so_data (so, chan->vram->handle);
so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1);
so_data (so, chan->vram->handle);

for (i=1; i<8; i++) {
so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1);
so_data (so, 0);
so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1);
so_data (so, 0);
}

so_method(so, screen->rankine, 0x220, 1);
so_data (so, 1);

so_method(so, screen->rankine, 0x03b0, 1);
so_data (so, 0x00100000);
so_method(so, screen->rankine, 0x1454, 1);
so_data (so, 0);
so_method(so, screen->rankine, 0x1d80, 1);
so_data (so, 3);
so_method(so, screen->rankine, 0x1450, 1);
so_data (so, 0x00030004);

/* NEW */
so_method(so, screen->rankine, 0x1e98, 1);
so_data (so, 0);
so_method(so, screen->rankine, 0x17e0, 3);
so_data (so, fui(0.0));
so_data (so, fui(0.0));
so_data (so, fui(1.0));
so_method(so, screen->rankine, 0x1f80, 16);
for (i=0; i<16; i++) {
so_data (so, (i==8) ? 0x0000ffff : 0);
}

so_method(so, screen->rankine, 0x120, 3);
so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2);
so_data (so, 0);
so_data (so, 1);
so_data (so, 2);
so_data (so, screen->query->handle);

so_method(so, screen->rankine, 0x1d88, 1);
so_data (so, 0x00001200);

so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1);
so_data (so, 0);

so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2);
so_data (so, fui(0.0));
so_data (so, fui(1.0));

so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
so_data (so, 0xffff0000);
so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 2);
so_data (so, chan->vram->handle);
so_data (so, chan->vram->handle);

/* enables use of vp rather than fixed-function somehow */
so_method(so, screen->rankine, 0x1e94, 1);
so_data (so, 0x13);
if(!screen->is_nv4x)
nv30_screen_init(screen, so);
else
nv40_screen_init(screen, so);

so_emit(chan, so);
so_ref(NULL, &so);

src/gallium/drivers/nv30/nv30_screen.h → src/gallium/drivers/nvfx/nvfx_screen.h 파일 보기

@@ -1,20 +1,21 @@
#ifndef __NV30_SCREEN_H__
#define __NV30_SCREEN_H__
#ifndef __NVFX_SCREEN_H__
#define __NVFX_SCREEN_H__

#include "nouveau/nouveau_screen.h"
#include "nv04_surface_2d.h"

#include "nouveau/nv04_surface_2d.h"

struct nv30_screen {
struct nvfx_screen {
struct nouveau_screen base;

struct nouveau_winsys *nvws;

struct nv30_context *cur_ctx;
struct nvfx_context *cur_ctx;

unsigned is_nv4x; /* either 0 or ~0 */

/* HW graphics objects */
struct nv04_surface_2d *eng2d;
struct nouveau_grobj *rankine;
struct nouveau_grobj *eng3d;
struct nouveau_notifier *sync;

/* Query object resources */
@@ -26,13 +27,13 @@ struct nv30_screen {
struct nouveau_resource *vp_data_heap;

/* Current 3D state of channel */
struct nouveau_stateobj *state[NV30_STATE_MAX];
struct nouveau_stateobj *state[NVFX_STATE_MAX];
};

static INLINE struct nv30_screen *
nv30_screen(struct pipe_screen *screen)
static INLINE struct nvfx_screen *
nvfx_screen(struct pipe_screen *screen)
{
return (struct nv30_screen *)screen;
return (struct nvfx_screen *)screen;
}

#endif

+ 429
- 0
src/gallium/drivers/nvfx/nvfx_shader.h 파일 보기

@@ -0,0 +1,429 @@
#ifndef __NVFX_SHADER_H__
#define __NVFX_SHADER_H__

/* this will resolve to either the NV30 or the NV40 version
* depending on the current hardware */
/* unusual, but very fast and compact method */
#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c))))

#define NVFX_VP_INST_SLOT_VEC 0
#define NVFX_VP_INST_SLOT_SCA 1

#define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
#define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
#define NVFX_VP_INST_IN_NORMAL 2
#define NVFX_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
#define NVFX_VP_INST_IN_COL1 4
#define NVFX_VP_INST_IN_FOGC 5
#define NVFX_VP_INST_IN_TC0 8
#define NVFX_VP_INST_IN_TC(n) (8+n)

#define NVFX_VP_INST_SCA_OP_NOP 0x00
#define NVFX_VP_INST_SCA_OP_MOV 0x01
#define NVFX_VP_INST_SCA_OP_RCP 0x02
#define NVFX_VP_INST_SCA_OP_RCC 0x03
#define NVFX_VP_INST_SCA_OP_RSQ 0x04
#define NVFX_VP_INST_SCA_OP_EXP 0x05
#define NVFX_VP_INST_SCA_OP_LOG 0x06
#define NVFX_VP_INST_SCA_OP_LIT 0x07
#define NVFX_VP_INST_SCA_OP_BRA 0x09
#define NVFX_VP_INST_SCA_OP_CAL 0x0B
#define NVFX_VP_INST_SCA_OP_RET 0x0C
#define NVFX_VP_INST_SCA_OP_LG2 0x0D
#define NVFX_VP_INST_SCA_OP_EX2 0x0E
#define NVFX_VP_INST_SCA_OP_SIN 0x0F
#define NVFX_VP_INST_SCA_OP_COS 0x10

#define NV40_VP_INST_SCA_OP_PUSHA 0x13
#define NV40_VP_INST_SCA_OP_POPA 0x14

#define NVFX_VP_INST_VEC_OP_NOP 0x00
#define NVFX_VP_INST_VEC_OP_MOV 0x01
#define NVFX_VP_INST_VEC_OP_MUL 0x02
#define NVFX_VP_INST_VEC_OP_ADD 0x03
#define NVFX_VP_INST_VEC_OP_MAD 0x04
#define NVFX_VP_INST_VEC_OP_DP3 0x05
#define NVFX_VP_INST_VEC_OP_DPH 0x06
#define NVFX_VP_INST_VEC_OP_DP4 0x07
#define NVFX_VP_INST_VEC_OP_DST 0x08
#define NVFX_VP_INST_VEC_OP_MIN 0x09
#define NVFX_VP_INST_VEC_OP_MAX 0x0A
#define NVFX_VP_INST_VEC_OP_SLT 0x0B
#define NVFX_VP_INST_VEC_OP_SGE 0x0C
#define NVFX_VP_INST_VEC_OP_ARL 0x0D
#define NVFX_VP_INST_VEC_OP_FRC 0x0E
#define NVFX_VP_INST_VEC_OP_FLR 0x0F
#define NVFX_VP_INST_VEC_OP_SEQ 0x10
#define NVFX_VP_INST_VEC_OP_SFL 0x11
#define NVFX_VP_INST_VEC_OP_SGT 0x12
#define NVFX_VP_INST_VEC_OP_SLE 0x13
#define NVFX_VP_INST_VEC_OP_SNE 0x14
#define NVFX_VP_INST_VEC_OP_STR 0x15
#define NVFX_VP_INST_VEC_OP_SSG 0x16
#define NVFX_VP_INST_VEC_OP_ARR 0x17
#define NVFX_VP_INST_VEC_OP_ARA 0x18

#define NV40_VP_INST_VEC_OP_TXL 0x19

/* DWORD 3 */
#define NVFX_VP_INST_LAST (1 << 0)

/*
* Each fragment program opcode appears to be comprised of 4 32-bit values.
*
* 0 - Opcode, output reg/mask, ATTRIB source
* 1 - Source 0
* 2 - Source 1
* 3 - Source 2
*
* There appears to be no special difference between result regs and temp regs.
* result.color == R0.xyzw
* result.depth == R1.z
* When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
* otherwise it is set to 1.
*
* Constants are inserted directly after the instruction that uses them.
*
* It appears that it's not possible to use two input registers in one
* instruction as the input sourcing is done in the instruction dword
* and not the source selection dwords. As such instructions such as:
*
* ADD result.color, fragment.color, fragment.texcoord[0];
*
* must be split into two MOV's and then an ADD (nvidia does this) but
* I'm not sure why it's not just one MOV and then source the second input
* in the ADD instruction..
*
* Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
* negation requires multiplication with a const.
*
* Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
* The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
* is implemented simply by not writing to the relevant components of the destination.
*
* Conditional execution
* TODO
*
* Non-native instructions:
* LIT
* LRP - MAD+MAD
* SUB - ADD, negate second source
* RSQ - LG2 + EX2
* POW - LG2 + MUL + EX2
* SCS - COS + SIN
* XPD
*
* NV40 Looping
* Loops appear to be fairly expensive on NV40 at least, the proprietary
* driver goes to a lot of effort to avoid using the native looping
* instructions. If the total number of *executed* instructions between
* REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
* The maximum loop count is 255.
*
*/

//== Opcode / Destination selection ==
#define NVFX_FP_OP_PROGRAM_END (1 << 0)
#define NVFX_FP_OP_OUT_REG_SHIFT 1
#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
#define NV40_FP_OP_OUT_REG_MASK (63 << 1)
/* Needs to be set when writing outputs to get expected result.. */
#define NVFX_FP_OP_OUT_REG_HALF (1 << 7)
#define NVFX_FP_OP_COND_WRITE_ENABLE (1 << 8)
#define NVFX_FP_OP_OUTMASK_SHIFT 9
#define NVFX_FP_OP_OUTMASK_MASK (0xF << 9)
# define NVFX_FP_OP_OUT_X (1<<9)
# define NVFX_FP_OP_OUT_Y (1<<10)
# define NVFX_FP_OP_OUT_Z (1<<11)
# define NVFX_FP_OP_OUT_W (1<<12)
/* Uncertain about these, especially the input_src values.. it's possible that
* they can be dynamically changed.
*/
#define NVFX_FP_OP_INPUT_SRC_SHIFT 13
#define NVFX_FP_OP_INPUT_SRC_MASK (15 << 13)
# define NVFX_FP_OP_INPUT_SRC_POSITION 0x0
# define NVFX_FP_OP_INPUT_SRC_COL0 0x1
# define NVFX_FP_OP_INPUT_SRC_COL1 0x2
# define NVFX_FP_OP_INPUT_SRC_FOGC 0x3
# define NVFX_FP_OP_INPUT_SRC_TC0 0x4
# define NVFX_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
# define NV40_FP_OP_INPUT_SRC_FACING 0xE
#define NVFX_FP_OP_TEX_UNIT_SHIFT 17
#define NVFX_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
#define NVFX_FP_OP_PRECISION_SHIFT 22
#define NVFX_FP_OP_PRECISION_MASK (3 << 22)
# define NVFX_FP_PRECISION_FP32 0
# define NVFX_FP_PRECISION_FP16 1
# define NVFX_FP_PRECISION_FX12 2
#define NVFX_FP_OP_OPCODE_SHIFT 24
#define NVFX_FP_OP_OPCODE_MASK (0x3F << 24)
/* NV30/NV40 fragment program opcodes */
#define NVFX_FP_OP_OPCODE_NOP 0x00
#define NVFX_FP_OP_OPCODE_MOV 0x01
#define NVFX_FP_OP_OPCODE_MUL 0x02
#define NVFX_FP_OP_OPCODE_ADD 0x03
#define NVFX_FP_OP_OPCODE_MAD 0x04
#define NVFX_FP_OP_OPCODE_DP3 0x05
#define NVFX_FP_OP_OPCODE_DP4 0x06
#define NVFX_FP_OP_OPCODE_DST 0x07
#define NVFX_FP_OP_OPCODE_MIN 0x08
#define NVFX_FP_OP_OPCODE_MAX 0x09
#define NVFX_FP_OP_OPCODE_SLT 0x0A
#define NVFX_FP_OP_OPCODE_SGE 0x0B
#define NVFX_FP_OP_OPCODE_SLE 0x0C
#define NVFX_FP_OP_OPCODE_SGT 0x0D
#define NVFX_FP_OP_OPCODE_SNE 0x0E
#define NVFX_FP_OP_OPCODE_SEQ 0x0F
#define NVFX_FP_OP_OPCODE_FRC 0x10
#define NVFX_FP_OP_OPCODE_FLR 0x11
#define NVFX_FP_OP_OPCODE_KIL 0x12
#define NVFX_FP_OP_OPCODE_PK4B 0x13
#define NVFX_FP_OP_OPCODE_UP4B 0x14
#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
#define NVFX_FP_OP_OPCODE_TEX 0x17
#define NVFX_FP_OP_OPCODE_TXP 0x18
#define NVFX_FP_OP_OPCODE_TXD 0x19
#define NVFX_FP_OP_OPCODE_RCP 0x1A
#define NVFX_FP_OP_OPCODE_EX2 0x1C
#define NVFX_FP_OP_OPCODE_LG2 0x1D
#define NVFX_FP_OP_OPCODE_STR 0x20
#define NVFX_FP_OP_OPCODE_SFL 0x21
#define NVFX_FP_OP_OPCODE_COS 0x22
#define NVFX_FP_OP_OPCODE_SIN 0x23
#define NVFX_FP_OP_OPCODE_PK2H 0x24
#define NVFX_FP_OP_OPCODE_UP2H 0x25
#define NVFX_FP_OP_OPCODE_PK4UB 0x27
#define NVFX_FP_OP_OPCODE_UP4UB 0x28
#define NVFX_FP_OP_OPCODE_PK2US 0x29
#define NVFX_FP_OP_OPCODE_UP2US 0x2A
#define NVFX_FP_OP_OPCODE_DP2A 0x2E
#define NVFX_FP_OP_OPCODE_TXB 0x31
#define NVFX_FP_OP_OPCODE_DIV 0x3A

/* NV30 only fragment program opcodes */
#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B
#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E
#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F
#define NVFX_FP_OP_OPCODE_POW_NV30 0x26
#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36

/* NV40 only fragment program opcodes */
#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31
/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
#define NV40_FP_OP_BRA_OPCODE_BRK 0x0
#define NV40_FP_OP_BRA_OPCODE_CAL 0x1
#define NV40_FP_OP_BRA_OPCODE_IF 0x2
#define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
#define NV40_FP_OP_BRA_OPCODE_REP 0x4
#define NV40_FP_OP_BRA_OPCODE_RET 0x5

#define NVFX_FP_OP_OUT_SAT (1 << 31)

/* high order bits of SRC0 */
#define NVFX_FP_OP_OUT_ABS (1 << 29)
#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27
#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27)
#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25
#define NVFX_FP_OP_COND_SWZ_Z_MASK (3 << 25)
#define NVFX_FP_OP_COND_SWZ_Y_SHIFT 23
#define NVFX_FP_OP_COND_SWZ_Y_MASK (3 << 23)
#define NVFX_FP_OP_COND_SWZ_X_SHIFT 21
#define NVFX_FP_OP_COND_SWZ_X_MASK (3 << 21)
#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT 21
#define NVFX_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
#define NVFX_FP_OP_COND_SHIFT 18
#define NVFX_FP_OP_COND_MASK (0x07 << 18)
# define NVFX_FP_OP_COND_FL 0
# define NVFX_FP_OP_COND_LT 1
# define NVFX_FP_OP_COND_EQ 2
# define NVFX_FP_OP_COND_LE 3
# define NVFX_FP_OP_COND_GT 4
# define NVFX_FP_OP_COND_NE 5
# define NVFX_FP_OP_COND_GE 6
# define NVFX_FP_OP_COND_TR 7

/* high order bits of SRC1 */
#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
#define NVFX_FP_OP_DST_SCALE_SHIFT 28
#define NVFX_FP_OP_DST_SCALE_MASK (3 << 28)
#define NVFX_FP_OP_DST_SCALE_1X 0
#define NVFX_FP_OP_DST_SCALE_2X 1
#define NVFX_FP_OP_DST_SCALE_4X 2
#define NVFX_FP_OP_DST_SCALE_8X 3
#define NVFX_FP_OP_DST_SCALE_INV_2X 5
#define NVFX_FP_OP_DST_SCALE_INV_4X 6
#define NVFX_FP_OP_DST_SCALE_INV_8X 7

/* SRC1 LOOP */
#define NV40_FP_OP_LOOP_INCR_SHIFT 19
#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)

/* SRC1 IF */
#define NV40_FP_OP_ELSE_ID_SHIFT 2
#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)

/* SRC1 CAL */
#define NV40_FP_OP_IADDR_SHIFT 2
#define NV40_FP_OP_IADDR_MASK (0xFF << 2)

/* SRC1 REP
* I have no idea why there are 3 count values here.. but they
* have always been filled with the same value in my tests so
* far..
*/
#define NV40_FP_OP_REP_COUNT1_SHIFT 2
#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
#define NV40_FP_OP_REP_COUNT2_SHIFT 10
#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
#define NV40_FP_OP_REP_COUNT3_SHIFT 19
#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)

/* SRC2 REP/IF */
#define NV40_FP_OP_END_ID_SHIFT 2
#define NV40_FP_OP_END_ID_MASK (0xFF << 2)

/* high order bits of SRC2 */
#define NVFX_FP_OP_INDEX_INPUT (1 << 30)
#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)

//== Register selection ==
#define NVFX_FP_REG_TYPE_SHIFT 0
#define NVFX_FP_REG_TYPE_MASK (3 << 0)
# define NVFX_FP_REG_TYPE_TEMP 0
# define NVFX_FP_REG_TYPE_INPUT 1
# define NVFX_FP_REG_TYPE_CONST 2
#define NVFX_FP_REG_SRC_SHIFT 2
#define NV30_FP_REG_SRC_MASK (31 << 2)
#define NV40_FP_REG_SRC_MASK (63 << 2)
#define NVFX_FP_REG_SRC_HALF (1 << 8)
#define NVFX_FP_REG_SWZ_ALL_SHIFT 9
#define NVFX_FP_REG_SWZ_ALL_MASK (255 << 9)
#define NVFX_FP_REG_SWZ_X_SHIFT 9
#define NVFX_FP_REG_SWZ_X_MASK (3 << 9)
#define NVFX_FP_REG_SWZ_Y_SHIFT 11
#define NVFX_FP_REG_SWZ_Y_MASK (3 << 11)
#define NVFX_FP_REG_SWZ_Z_SHIFT 13
#define NVFX_FP_REG_SWZ_Z_MASK (3 << 13)
#define NVFX_FP_REG_SWZ_W_SHIFT 15
#define NVFX_FP_REG_SWZ_W_MASK (3 << 15)
# define NVFX_FP_SWIZZLE_X 0
# define NVFX_FP_SWIZZLE_Y 1
# define NVFX_FP_SWIZZLE_Z 2
# define NVFX_FP_SWIZZLE_W 3
#define NVFX_FP_REG_NEGATE (1 << 17)

#define NVFXSR_NONE 0
#define NVFXSR_OUTPUT 1
#define NVFXSR_INPUT 2
#define NVFXSR_TEMP 3
#define NVFXSR_CONST 4

#define NVFX_COND_FL 0
#define NVFX_COND_LT 1
#define NVFX_COND_EQ 2
#define NVFX_COND_LE 3
#define NVFX_COND_GT 4
#define NVFX_COND_NE 5
#define NVFX_COND_GE 6
#define NVFX_COND_TR 7

/* Yes, this are ordered differently... */

#define NVFX_VP_MASK_X 8
#define NVFX_VP_MASK_Y 4
#define NVFX_VP_MASK_Z 2
#define NVFX_VP_MASK_W 1
#define NVFX_VP_MASK_ALL 0xf

#define NVFX_FP_MASK_X 1
#define NVFX_FP_MASK_Y 2
#define NVFX_FP_MASK_Z 4
#define NVFX_FP_MASK_W 8
#define NVFX_FP_MASK_ALL 0xf

#define NVFX_SWZ_X 0
#define NVFX_SWZ_Y 1
#define NVFX_SWZ_Z 2
#define NVFX_SWZ_W 3

#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w)
#define neg(s) nvfx_sr_neg((s))
#define abs(s) nvfx_sr_abs((s))
#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)

struct nvfx_sreg {
int type;
int index;

int dst_scale;

int negate;
int abs;
int swz[4];

int cc_update;
int cc_update_reg;
int cc_test;
int cc_test_reg;
int cc_swz[4];
};

static INLINE struct nvfx_sreg
nvfx_sr(int type, int index)
{
struct nvfx_sreg temp = {
.type = type,
.index = index,
.dst_scale = 0,
.abs = 0,
.negate = 0,
.swz = { 0, 1, 2, 3 },
.cc_update = 0,
.cc_update_reg = 0,
.cc_test = NVFX_COND_TR,
.cc_test_reg = 0,
.cc_swz = { 0, 1, 2, 3 },
};
return temp;
}

static INLINE struct nvfx_sreg
nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w)
{
struct nvfx_sreg dst = src;

dst.swz[NVFX_SWZ_X] = src.swz[x];
dst.swz[NVFX_SWZ_Y] = src.swz[y];
dst.swz[NVFX_SWZ_Z] = src.swz[z];
dst.swz[NVFX_SWZ_W] = src.swz[w];
return dst;
}

static INLINE struct nvfx_sreg
nvfx_sr_neg(struct nvfx_sreg src)
{
src.negate = !src.negate;
return src;
}

static INLINE struct nvfx_sreg
nvfx_sr_abs(struct nvfx_sreg src)
{
src.abs = 1;
return src;
}

static INLINE struct nvfx_sreg
nvfx_sr_scale(struct nvfx_sreg src, int scale)
{
src.dst_scale = scale;
return src;
}

#endif

+ 652
- 0
src/gallium/drivers/nvfx/nvfx_state.c 파일 보기

@@ -0,0 +1,652 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"

#include "draw/draw_context.h"

#include "tgsi/tgsi_parse.h"

#include "nvfx_context.h"
#include "nvfx_state.h"
#include "nvfx_tex.h"

static void *
nvfx_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso));
struct nouveau_stateobj *so = so_new(5, 8, 0);

if (cso->rt[0].blend_enable) {
so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3);
so_data (so, 1);
so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) |
nvgl_blend_func(cso->rt[0].rgb_src_factor));
so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 |
nvgl_blend_func(cso->rt[0].rgb_dst_factor));
if(nvfx->screen->base.device->chipset < 0x40) {
so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1);
so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
} else {
so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1);
so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 |
nvgl_blend_eqn(cso->rt[0].rgb_func));
}
} else {
so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1);
so_data (so, 0);
}

so_method(so, eng3d, NV34TCL_COLOR_MASK, 1);
so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));

/* TODO: add NV40 MRT color mask */

if (cso->logicop_enable) {
so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
so_data (so, 1);
so_data (so, nvgl_logicop_func(cso->logicop_func));
} else {
so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1);
so_data (so, 0);
}

so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1);
so_data (so, cso->dither ? 1 : 0);

so_ref(so, &bso->so);
so_ref(NULL, &so);
bso->pipe = *cso;
return (void *)bso;
}

static void
nvfx_blend_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->blend = hwcso;
nvfx->dirty |= NVFX_NEW_BLEND;
}

static void
nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_blend_state *bso = hwcso;

so_ref(NULL, &bso->so);
FREE(bso);
}

static void *
nvfx_sampler_state_create(struct pipe_context *pipe,
const struct pipe_sampler_state *cso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_sampler_state *ps;

ps = MALLOC(sizeof(struct nvfx_sampler_state));

/* on nv30, we use this as an internal flag */
ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT;
ps->en = 0;
ps->filt = nvfx_tex_filter(cso);
ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
(nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
(nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) |
nvfx_tex_wrap_compare_mode(cso);
ps->bcol = nvfx_tex_border_color(cso->border_color);

if(nvfx->is_nv4x)
nv40_sampler_state_init(pipe, ps, cso);
else
nv30_sampler_state_init(pipe, ps, cso);

return (void *)ps;
}

static void
nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
unsigned unit;

for (unit = 0; unit < nr; unit++) {
nvfx->tex_sampler[unit] = sampler[unit];
nvfx->dirty_samplers |= (1 << unit);
}

for (unit = nr; unit < nvfx->nr_samplers; unit++) {
nvfx->tex_sampler[unit] = NULL;
nvfx->dirty_samplers |= (1 << unit);
}

nvfx->nr_samplers = nr;
nvfx->dirty |= NVFX_NEW_SAMPLER;
}

static void
nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}

static void
nvfx_set_fragment_sampler_views(struct pipe_context *pipe,
unsigned nr,
struct pipe_sampler_view **views)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
unsigned unit;

for (unit = 0; unit < nr; unit++) {
pipe_sampler_view_reference(&nv30->fragment_sampler_views[unit],
views[unit]);
pipe_texture_reference((struct pipe_texture **)
&nvfx->tex_miptree[unit], miptree[unit]);
nvfx->dirty_samplers |= (1 << unit);
}

for (unit = nr; unit < nvfx->nr_textures; unit++) {
pipe_sampler_view_reference(&nv30->fragment_sampler_views[unit],
NULL);
pipe_texture_reference((struct pipe_texture **)
&nvfx->tex_miptree[unit], NULL);
nvfx->dirty_samplers |= (1 << unit);
}

nvfx->nr_textures = nr;
nvfx->dirty |= NVFX_NEW_SAMPLER;
}


static struct pipe_sampler_view *
nv30_create_sampler_view(struct pipe_context *pipe,
struct pipe_texture *texture,
const struct pipe_sampler_view *templ)
{
struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);

if (view) {
*view = *templ;
view->reference.count = 1;
view->texture = NULL;
pipe_texture_reference(&view->texture, texture);
view->context = pipe;
}

return view;
}


static void
nv30_sampler_view_destroy(struct pipe_context *pipe,
struct pipe_sampler_view *view)
{
pipe_texture_reference(&view->texture, NULL);
FREE(view);
}

static void *
nvfx_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
struct nouveau_stateobj *so = so_new(9, 19, 0);
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;

/*XXX: ignored:
* light_twoside
* point_smooth -nohw
* multisample
*/

so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT :
NV34TCL_SHADE_MODEL_SMOOTH);

so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2);
so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
so_data (so, cso->line_smooth ? 1 : 0);
so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2);
so_data (so, cso->line_stipple_enable ? 1 : 0);
so_data (so, (cso->line_stipple_pattern << 16) |
cso->line_stipple_factor);

so_method(so, eng3d, NV34TCL_POINT_SIZE, 1);
so_data (so, fui(cso->point_size));

so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6);
if (cso->front_winding == PIPE_WINDING_CCW) {
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
so_data(so, nvgl_polygon_mode(cso->fill_cw));
switch (cso->cull_mode) {
case PIPE_WINDING_CCW:
so_data(so, NV34TCL_CULL_FACE_FRONT);
break;
case PIPE_WINDING_CW:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
case PIPE_WINDING_BOTH:
so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV34TCL_FRONT_FACE_CCW);
} else {
so_data(so, nvgl_polygon_mode(cso->fill_cw));
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
switch (cso->cull_mode) {
case PIPE_WINDING_CCW:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
case PIPE_WINDING_CW:
so_data(so, NV34TCL_CULL_FACE_FRONT);
break;
case PIPE_WINDING_BOTH:
so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, NV34TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV34TCL_FRONT_FACE_CW);
}
so_data(so, cso->poly_smooth ? 1 : 0);
so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);

so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, cso->poly_stipple_enable ? 1 : 0);

so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
so_data(so, 1);
else
so_data(so, 0);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
so_data(so, 1);
else
so_data(so, 0);
if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
(cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
so_data(so, 1);
else
so_data(so, 0);
if (cso->offset_cw || cso->offset_ccw) {
so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2);
so_data (so, fui(cso->offset_scale));
so_data (so, fui(cso->offset_units * 2));
}

so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1);
if (cso->point_quad_rasterization) {
unsigned psctl = (1 << 0), i;

for (i = 0; i < 8; i++) {
if ((cso->sprite_coord_enable >> i) & 1)
psctl |= (1 << (8 + i));
}

so_data(so, psctl);
} else {
so_data(so, 0);
}

so_ref(so, &rsso->so);
so_ref(NULL, &so);
rsso->pipe = *cso;
return (void *)rsso;
}

static void
nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->rasterizer = hwcso;
nvfx->dirty |= NVFX_NEW_RAST;
nvfx->draw_dirty |= NVFX_NEW_RAST;
}

static void
nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_rasterizer_state *rsso = hwcso;

so_ref(NULL, &rsso->so);
FREE(rsso);
}

static void *
nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *cso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
struct nouveau_stateobj *so = so_new(6, 20, 0);
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;

so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3);
so_data (so, nvgl_comparison_op(cso->depth.func));
so_data (so, cso->depth.writemask ? 1 : 0);
so_data (so, cso->depth.enabled ? 1 : 0);

so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3);
so_data (so, cso->alpha.enabled ? 1 : 0);
so_data (so, nvgl_comparison_op(cso->alpha.func));
so_data (so, float_to_ubyte(cso->alpha.ref_value));

if (cso->stencil[0].enabled) {
so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3);
so_data (so, cso->stencil[0].enabled ? 1 : 0);
so_data (so, cso->stencil[0].writemask);
so_data (so, nvgl_comparison_op(cso->stencil[0].func));
so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4);
so_data (so, cso->stencil[0].valuemask);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
} else {
so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}

if (cso->stencil[1].enabled) {
so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3);
so_data (so, cso->stencil[1].enabled ? 1 : 0);
so_data (so, cso->stencil[1].writemask);
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4);
so_data (so, cso->stencil[1].valuemask);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
} else {
so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}

so_ref(so, &zsaso->so);
so_ref(NULL, &so);
zsaso->pipe = *cso;
return (void *)zsaso;
}

static void
nvfx_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->zsa = hwcso;
nvfx->dirty |= NVFX_NEW_ZSA;
}

static void
nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_zsa_state *zsaso = hwcso;

so_ref(NULL, &zsaso->so);
FREE(zsaso);
}

static void *
nvfx_vp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_vertex_program *vp;

vp = CALLOC(1, sizeof(struct nvfx_vertex_program));
vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);

return (void *)vp;
}

static void
nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->vertprog = hwcso;
nvfx->dirty |= NVFX_NEW_VERTPROG;
nvfx->draw_dirty |= NVFX_NEW_VERTPROG;
}

static void
nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_vertex_program *vp = hwcso;

draw_delete_vertex_shader(nvfx->draw, vp->draw);
nvfx_vertprog_destroy(nvfx, vp);
FREE((void*)vp->pipe.tokens);
FREE(vp);
}

static void *
nvfx_fp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
struct nvfx_fragment_program *fp;

fp = CALLOC(1, sizeof(struct nvfx_fragment_program));
fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);

tgsi_scan_shader(fp->pipe.tokens, &fp->info);

return (void *)fp;
}

static void
nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->fragprog = hwcso;
nvfx->dirty |= NVFX_NEW_FRAGPROG;
}

static void
nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_fragment_program *fp = hwcso;

nvfx_fragprog_destroy(nvfx, fp);
FREE((void*)fp->pipe.tokens);
FREE(fp);
}

static void
nvfx_set_blend_color(struct pipe_context *pipe,
const struct pipe_blend_color *bcol)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->blend_colour = *bcol;
nvfx->dirty |= NVFX_NEW_BCOL;
}

static void
nvfx_set_stencil_ref(struct pipe_context *pipe,
const struct pipe_stencil_ref *sr)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->stencil_ref = *sr;
nvfx->dirty |= NVFX_NEW_SR;
}

static void
nvfx_set_clip_state(struct pipe_context *pipe,
const struct pipe_clip_state *clip)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->clip = *clip;
nvfx->dirty |= NVFX_NEW_UCP;
nvfx->draw_dirty |= NVFX_NEW_UCP;
}

static void
nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_buffer *buf )
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->constbuf[shader] = buf;
nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float));

if (shader == PIPE_SHADER_VERTEX) {
nvfx->dirty |= NVFX_NEW_VERTPROG;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
nvfx->dirty |= NVFX_NEW_FRAGPROG;
}
}

static void
nvfx_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->framebuffer = *fb;
nvfx->dirty |= NVFX_NEW_FB;
}

static void
nvfx_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *stipple)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

memcpy(nvfx->stipple, stipple->stipple, 4 * 32);
nvfx->dirty |= NVFX_NEW_STIPPLE;
}

static void
nvfx_set_scissor_state(struct pipe_context *pipe,
const struct pipe_scissor_state *s)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->scissor = *s;
nvfx->dirty |= NVFX_NEW_SCISSOR;
}

static void
nvfx_set_viewport_state(struct pipe_context *pipe,
const struct pipe_viewport_state *vpt)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->viewport = *vpt;
nvfx->dirty |= NVFX_NEW_VIEWPORT;
nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;
}

static void
nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
const struct pipe_vertex_buffer *vb)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count);
nvfx->vtxbuf_nr = count;

nvfx->dirty |= NVFX_NEW_ARRAYS;
nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}

static void *
nvfx_vtxelts_state_create(struct pipe_context *pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
{
struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);

assert(num_elements < 16); /* not doing fallbacks yet */
cso->num_elements = num_elements;
memcpy(cso->pipe, elements, num_elements * sizeof(*elements));

/* nvfx_vtxelt_construct(cso);*/

return (void *)cso;
}

static void
nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}

static void
nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);

nvfx->vtxelt = hwcso;
nvfx->dirty |= NVFX_NEW_ARRAYS;
/*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/
}

void
nvfx_init_state_functions(struct nvfx_context *nvfx)
{
nvfx->pipe.create_blend_state = nvfx_blend_state_create;
nvfx->pipe.bind_blend_state = nvfx_blend_state_bind;
nvfx->pipe.delete_blend_state = nvfx_blend_state_delete;

nvfx->pipe.create_sampler_state = nvfx_sampler_state_create;
nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind;
nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete;
nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture;

nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create;
nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind;
nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete;

nvfx->pipe.create_depth_stencil_alpha_state =
nvfx_depth_stencil_alpha_state_create;
nvfx->pipe.bind_depth_stencil_alpha_state =
nvfx_depth_stencil_alpha_state_bind;
nvfx->pipe.delete_depth_stencil_alpha_state =
nvfx_depth_stencil_alpha_state_delete;

nvfx->pipe.create_vs_state = nvfx_vp_state_create;
nvfx->pipe.bind_vs_state = nvfx_vp_state_bind;
nvfx->pipe.delete_vs_state = nvfx_vp_state_delete;

nvfx->pipe.create_fs_state = nvfx_fp_state_create;
nvfx->pipe.bind_fs_state = nvfx_fp_state_bind;
nvfx->pipe.delete_fs_state = nvfx_fp_state_delete;

nvfx->pipe.set_blend_color = nvfx_set_blend_color;
nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref;
nvfx->pipe.set_clip_state = nvfx_set_clip_state;
nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer;
nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state;
nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple;
nvfx->pipe.set_scissor_state = nvfx_set_scissor_state;
nvfx->pipe.set_viewport_state = nvfx_set_viewport_state;

nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;

nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
}

src/gallium/drivers/nv40/nv40_state.h → src/gallium/drivers/nvfx/nvfx_state.h 파일 보기

@@ -1,29 +1,21 @@
#ifndef __NV40_STATE_H__
#define __NV40_STATE_H__
#ifndef __NVFX_STATE_H__
#define __NVFX_STATE_H__

#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"

struct nv40_sampler_state {
uint32_t fmt;
uint32_t wrap;
uint32_t en;
uint32_t filt;
uint32_t bcol;
};

struct nv40_vertex_program_exec {
struct nvfx_vertex_program_exec {
uint32_t data[4];
boolean has_branch_offset;
int const_index;
};

struct nv40_vertex_program_data {
struct nvfx_vertex_program_data {
int index; /* immediates == -1 */
float value[4];
};

struct nv40_vertex_program {
struct nvfx_vertex_program {
struct pipe_shader_state pipe;

struct draw_vertex_shader *draw;
@@ -32,9 +24,9 @@ struct nv40_vertex_program {

struct pipe_clip_state ucp;

struct nv40_vertex_program_exec *insns;
struct nvfx_vertex_program_exec *insns;
unsigned nr_insns;
struct nv40_vertex_program_data *consts;
struct nvfx_vertex_program_data *consts;
unsigned nr_consts;

struct nouveau_resource *exec;
@@ -49,12 +41,12 @@ struct nv40_vertex_program {
struct nouveau_stateobj *so;
};

struct nv40_fragment_program_data {
struct nvfx_fragment_program_data {
unsigned offset;
unsigned index;
};

struct nv40_fragment_program {
struct nvfx_fragment_program {
struct pipe_shader_state pipe;
struct tgsi_shader_info info;

@@ -64,7 +56,7 @@ struct nv40_fragment_program {
uint32_t *insn;
int insn_len;

struct nv40_fragment_program_data *consts;
struct nvfx_fragment_program_data *consts;
unsigned nr_consts;

struct pipe_buffer *buffer;
@@ -73,9 +65,9 @@ struct nv40_fragment_program {
struct nouveau_stateobj *so;
};

#define NV40_MAX_TEXTURE_LEVELS 16
#define NVFX_MAX_TEXTURE_LEVELS 16

struct nv40_miptree {
struct nvfx_miptree {
struct pipe_texture base;
struct nouveau_bo *bo;

@@ -85,7 +77,7 @@ struct nv40_miptree {
struct {
uint pitch;
uint *image_offset;
} level[NV40_MAX_TEXTURE_LEVELS];
} level[NVFX_MAX_TEXTURE_LEVELS];
};

#endif

+ 41
- 0
src/gallium/drivers/nvfx/nvfx_state_blend.c 파일 보기

@@ -0,0 +1,41 @@
#include "nvfx_context.h"

static boolean
nvfx_state_blend_validate(struct nvfx_context *nvfx)
{
so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]);
return TRUE;
}

struct nvfx_state_entry nvfx_state_blend = {
.validate = nvfx_state_blend_validate,
.dirty = {
.pipe = NVFX_NEW_BLEND,
.hw = NVFX_STATE_BLEND
}
};

static boolean
nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
{
struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nvfx->blend_colour;

so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1);
so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
(float_to_ubyte(bcol->color[0]) << 16) |
(float_to_ubyte(bcol->color[1]) << 8) |
(float_to_ubyte(bcol->color[2]) << 0)));

so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]);
so_ref(NULL, &so);
return TRUE;
}

struct nvfx_state_entry nvfx_state_blend_colour = {
.validate = nvfx_state_blend_colour_validate,
.dirty = {
.pipe = NVFX_NEW_BCOL,
.hw = NVFX_STATE_BCOL
}
};

+ 179
- 0
src/gallium/drivers/nvfx/nvfx_state_emit.c 파일 보기

@@ -0,0 +1,179 @@
#include "nvfx_context.h"
#include "nvfx_state.h"
#include "draw/draw_context.h"

#define RENDER_STATES(name, vbo) \
static struct nvfx_state_entry *name##render_states[] = { \
&nvfx_state_framebuffer, \
&nvfx_state_rasterizer, \
&nvfx_state_scissor, \
&nvfx_state_stipple, \
&nvfx_state_fragprog, \
&nvfx_state_fragtex, \
&nvfx_state_vertprog, \
&nvfx_state_blend, \
&nvfx_state_blend_colour, \
&nvfx_state_zsa, \
&nvfx_state_sr, \
&nvfx_state_viewport, \
&nvfx_state_##vbo, \
NULL \
}

RENDER_STATES(, vbo);
RENDER_STATES(swtnl_, vtxfmt);

static void
nvfx_state_do_validate(struct nvfx_context *nvfx,
struct nvfx_state_entry **states)
{
while (*states) {
struct nvfx_state_entry *e = *states;

if (nvfx->dirty & e->dirty.pipe) {
if (e->validate(nvfx))
nvfx->state.dirty |= (1ULL << e->dirty.hw);
}

states++;
}
nvfx->dirty = 0;
}

void
nvfx_state_emit(struct nvfx_context *nvfx)
{
struct nvfx_state *state = &nvfx->state;
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
unsigned i;
uint64_t states;

/* XXX: race conditions
*/
if (nvfx != screen->cur_ctx) {
for (i = 0; i < NVFX_STATE_MAX; i++) {
if (state->hw[i] && screen->state[i] != state->hw[i])
state->dirty |= (1ULL << i);
}

screen->cur_ctx = nvfx;
}

for (i = 0, states = state->dirty; states; i++) {
if (!(states & (1ULL << i)))
continue;
so_ref (state->hw[i], &nvfx->screen->state[i]);
if (state->hw[i])
so_emit(chan, nvfx->screen->state[i]);
states &= ~(1ULL << i);
}

/* TODO: could nv30 need this or something similar too? */
if(nvfx->is_nv4x) {
if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) |
(1ULL << NVFX_STATE_FRAGTEX0))) {
BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (chan, 2);
BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (chan, 1);
}
}
state->dirty = 0;
}

void
nvfx_state_flush_notify(struct nouveau_channel *chan)
{
struct nvfx_context *nvfx = chan->user_private;
struct nvfx_state *state = &nvfx->state;
unsigned i, samplers;

so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
if (!(samplers & (1 << i)))
continue;
so_emit_reloc_markers(chan,
state->hw[NVFX_STATE_FRAGTEX0+i]);
samplers &= ~(1ULL << i);
}
so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]);
if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW)
so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]);
}

boolean
nvfx_state_validate(struct nvfx_context *nvfx)
{
boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE;

if (nvfx->render_mode != HW) {
/* Don't even bother trying to go back to hw if none
* of the states that caused swtnl previously have changed.
*/
if ((nvfx->fallback_swtnl & nvfx->dirty)
!= nvfx->fallback_swtnl)
return FALSE;

/* Attempt to go to hwtnl again */
nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
nvfx->dirty |= (NVFX_NEW_VIEWPORT |
NVFX_NEW_VERTPROG |
NVFX_NEW_ARRAYS);
nvfx->render_mode = HW;
}

nvfx_state_do_validate(nvfx, render_states);

if (nvfx->fallback_swtnl || nvfx->fallback_swrast)
return FALSE;

if (was_sw)
NOUVEAU_ERR("swtnl->hw\n");

return TRUE;
}

boolean
nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
{
struct draw_context *draw = nvfx->draw;

/* Setup for swtnl */
if (nvfx->render_mode == HW) {
NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
nvfx->dirty |= (NVFX_NEW_VIEWPORT |
NVFX_NEW_VERTPROG |
NVFX_NEW_ARRAYS);
nvfx->render_mode = SWTNL;
}

if (nvfx->draw_dirty & NVFX_NEW_VERTPROG)
draw_bind_vertex_shader(draw, nvfx->vertprog->draw);

if (nvfx->draw_dirty & NVFX_NEW_RAST)
draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe);

if (nvfx->draw_dirty & NVFX_NEW_UCP)
draw_set_clip_state(draw, &nvfx->clip);

if (nvfx->draw_dirty & NVFX_NEW_VIEWPORT)
draw_set_viewport_state(draw, &nvfx->viewport);

if (nvfx->draw_dirty & NVFX_NEW_ARRAYS) {
draw_set_vertex_buffers(draw, nvfx->vtxbuf_nr, nvfx->vtxbuf);
draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe);
}

nvfx_state_do_validate(nvfx, swtnl_render_states);

if (nvfx->fallback_swrast) {
NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast);
return FALSE;
}

nvfx->draw_dirty = 0;
return TRUE;
}

+ 234
- 0
src/gallium/drivers/nvfx/nvfx_state_fb.c 파일 보기

@@ -0,0 +1,234 @@
#include "nvfx_context.h"
#include "nouveau/nouveau_util.h"

static struct pipe_buffer *
nvfx_do_surface_buffer(struct pipe_surface *surface)
{
struct nvfx_miptree *mt = (struct nvfx_miptree *)surface->texture;
return mt->buffer;
}

#define nvfx_surface_buffer(ps) nouveau_bo(nvfx_do_surface_buffer(ps))

static boolean
nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
{
struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
struct nouveau_channel *chan = nvfx->screen->base.channel;
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nv04_surface *rt[4], *zeta = NULL;
uint32_t rt_enable = 0, rt_format = 0;
int i, colour_format = 0, zeta_format = 0;
int depth_only = 0;
struct nouveau_stateobj *so = so_new(18, 24, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;
int colour_bits = 32, zeta_bits = 32;

if(!nvfx->is_nv4x)
assert(fb->nr_cbufs <= 2);
else
assert(fb->nr_cbufs <= 4);

for (i = 0; i < fb->nr_cbufs; i++) {
if (colour_format) {
assert(colour_format == fb->cbufs[i]->format);
} else {
colour_format = fb->cbufs[i]->format;
rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
rt[i] = (struct nv04_surface *)fb->cbufs[i];
}
}

if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |
NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))
rt_enable |= NV34TCL_RT_ENABLE_MRT;

if (fb->zsbuf) {
zeta_format = fb->zsbuf->format;
zeta = (struct nv04_surface *)fb->zsbuf;
}

if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 |
NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) {
/* Render to at least a colour buffer */
if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
for (i = 1; i < fb->nr_cbufs; i++)
assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));

rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
(log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
(log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
}
else
rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
} else if (fb->zsbuf) {
depth_only = 1;

/* Render to depth buffer only */
if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));

rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
(log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
(log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
}
else
rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
} else {
return FALSE;
}

switch (colour_format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
case 0:
rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
break;
case PIPE_FORMAT_B5G6R5_UNORM:
rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
colour_bits = 16;
break;
default:
assert(0);
}

switch (zeta_format) {
case PIPE_FORMAT_Z16_UNORM:
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
zeta_bits = 16;
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
case 0:
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
break;
default:
assert(0);
}

if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) {
/* TODO: does this limitation really exist?
TODO: can it be worked around somehow? */
return FALSE;
}

if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0)
|| ((!nvfx->is_nv4x) && depth_only)) {
struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]);
uint32_t pitch = rt0->pitch;

if(!nvfx->is_nv4x)
{
if (zeta) {
pitch |= (zeta->pitch << 16);
} else {
pitch |= (pitch << 16);
}
}

so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1);
so_reloc (so, nvfx_surface_buffer(&rt0->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2);
so_data (so, pitch);
so_reloc (so, nvfx_surface_buffer(&rt[0]->base),
rt0->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
}

if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1);
so_reloc (so, nvfx_surface_buffer(&rt[1]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2);
so_reloc (so, nvfx_surface_buffer(&rt[1]->base),
rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
so_data (so, rt[1]->pitch);
}

if(nvfx->is_nv4x)
{
if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1);
so_reloc (so, nvfx_surface_buffer(&rt[2]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1);
so_reloc (so, nvfx_surface_buffer(&rt[2]->base),
rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1);
so_data (so, rt[2]->pitch);
}

if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1);
so_reloc (so, nvfx_surface_buffer(&rt[3]->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1);
so_reloc (so, nvfx_surface_buffer(&rt[3]->base),
rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1);
so_data (so, rt[3]->pitch);
}
}

if (zeta_format) {
so_method(so, eng3d, NV34TCL_DMA_ZETA, 1);
so_reloc (so, nvfx_surface_buffer(&zeta->base), 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1);
/* TODO: reverse engineer LMA */
so_reloc (so, nvfx_surface_buffer(&zeta->base),
zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
if(nvfx->is_nv4x) {
so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1);
so_data (so, zeta->pitch);
}
}

so_method(so, eng3d, NV34TCL_RT_ENABLE, 1);
so_data (so, rt_enable);
so_method(so, eng3d, NV34TCL_RT_HORIZ, 3);
so_data (so, (w << 16) | 0);
so_data (so, (h << 16) | 0);
so_data (so, rt_format);
so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2);
so_data (so, (w << 16) | 0);
so_data (so, (h << 16) | 0);
so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
so_data (so, ((w - 1) << 16) | 0);
so_data (so, ((h - 1) << 16) | 0);
so_method(so, eng3d, 0x1d88, 1);
so_data (so, (1 << 12) | h);

if(!nvfx->is_nv4x) {
/* Wonder why this is needed, context should all be set to zero on init */
/* TODO: we can most likely remove this, after putting it in context init */
so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
so_data (so, 0);
}

so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]);
so_ref(NULL, &so);
return TRUE;
}

struct nvfx_state_entry nvfx_state_framebuffer = {
.validate = nvfx_state_framebuffer_validate,
.dirty = {
.pipe = NVFX_NEW_FB,
.hw = NVFX_STATE_FB
}
};

+ 17
- 0
src/gallium/drivers/nvfx/nvfx_state_rasterizer.c 파일 보기

@@ -0,0 +1,17 @@
#include "nvfx_context.h"

static boolean
nvfx_state_rasterizer_validate(struct nvfx_context *nvfx)
{
so_ref(nvfx->rasterizer->so,
&nvfx->state.hw[NVFX_STATE_RAST]);
return TRUE;
}

struct nvfx_state_entry nvfx_state_rasterizer = {
.validate = nvfx_state_rasterizer_validate,
.dirty = {
.pipe = NVFX_NEW_RAST,
.hw = NVFX_STATE_RAST
}
};

+ 36
- 0
src/gallium/drivers/nvfx/nvfx_state_scissor.c 파일 보기

@@ -0,0 +1,36 @@
#include "nvfx_context.h"

static boolean
nvfx_state_scissor_validate(struct nvfx_context *nvfx)
{
struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
struct pipe_scissor_state *s = &nvfx->scissor;
struct nouveau_stateobj *so;

if (nvfx->state.hw[NVFX_STATE_SCISSOR] &&
(rast->scissor == 0 && nvfx->state.scissor_enabled == 0))
return FALSE;
nvfx->state.scissor_enabled = rast->scissor;

so = so_new(1, 2, 0);
so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2);
if (nvfx->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
} else {
so_data (so, 4096 << 16);
so_data (so, 4096 << 16);
}

so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]);
so_ref(NULL, &so);
return TRUE;
}

struct nvfx_state_entry nvfx_state_scissor = {
.validate = nvfx_state_scissor_validate,
.dirty = {
.pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST,
.hw = NVFX_STATE_SCISSOR
}
};

+ 40
- 0
src/gallium/drivers/nvfx/nvfx_state_stipple.c 파일 보기

@@ -0,0 +1,40 @@
#include "nvfx_context.h"

static boolean
nvfx_state_stipple_validate(struct nvfx_context *nvfx)
{
struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nouveau_stateobj *so;

if (nvfx->state.hw[NVFX_STATE_STIPPLE] &&
(rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0))
return FALSE;

if (rast->poly_stipple_enable) {
unsigned i;

so = so_new(2, 33, 0);
so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nvfx->stipple[i]);
} else {
so = so_new(1, 1, 0);
so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}

so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]);
so_ref(NULL, &so);
return TRUE;
}

struct nvfx_state_entry nvfx_state_stipple = {
.validate = nvfx_state_stipple_validate,
.dirty = {
.pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST,
.hw = NVFX_STATE_STIPPLE,
}
};

+ 51
- 0
src/gallium/drivers/nvfx/nvfx_state_viewport.c 파일 보기

@@ -0,0 +1,51 @@
#include "nvfx_context.h"

static boolean
nvfx_state_viewport_validate(struct nvfx_context *nvfx)
{
struct pipe_viewport_state *vpt = &nvfx->viewport;
struct nouveau_stateobj *so;

if (nvfx->state.hw[NVFX_STATE_VIEWPORT] &&
!(nvfx->dirty & NVFX_NEW_VIEWPORT))
return FALSE;

so = so_new(2, 9, 0);
so_method(so, nvfx->screen->eng3d,
NV34TCL_VIEWPORT_TRANSLATE_X, 8);
if(nvfx->render_mode == HW) {
so_data (so, fui(vpt->translate[0]));
so_data (so, fui(vpt->translate[1]));
so_data (so, fui(vpt->translate[2]));
so_data (so, fui(vpt->translate[3]));
so_data (so, fui(vpt->scale[0]));
so_data (so, fui(vpt->scale[1]));
so_data (so, fui(vpt->scale[2]));
so_data (so, fui(vpt->scale[3]));
so_method(so, nvfx->screen->eng3d, 0x1d78, 1);
so_data (so, 1);
} else {
so_data (so, fui(0.0f));
so_data (so, fui(0.0f));
so_data (so, fui(0.0f));
so_data (so, fui(0.0f));
so_data (so, fui(1.0f));
so_data (so, fui(1.0f));
so_data (so, fui(1.0f));
so_data (so, fui(1.0f));
so_method(so, nvfx->screen->eng3d, 0x1d78, 1);
so_data (so, nvfx->is_nv4x ? 0x110 : 1);
}

so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]);
so_ref(NULL, &so);
return TRUE;
}

struct nvfx_state_entry nvfx_state_viewport = {
.validate = nvfx_state_viewport_validate,
.dirty = {
.pipe = NVFX_NEW_VIEWPORT,
.hw = NVFX_STATE_VIEWPORT
}
};

+ 41
- 0
src/gallium/drivers/nvfx/nvfx_state_zsa.c 파일 보기

@@ -0,0 +1,41 @@
#include "nvfx_context.h"

static boolean
nvfx_state_zsa_validate(struct nvfx_context *nvfx)
{
so_ref(nvfx->zsa->so,
&nvfx->state.hw[NVFX_STATE_ZSA]);
return TRUE;
}

struct nvfx_state_entry nvfx_state_zsa = {
.validate = nvfx_state_zsa_validate,
.dirty = {
.pipe = NVFX_NEW_ZSA,
.hw = NVFX_STATE_ZSA
}
};

static boolean
nvfx_state_sr_validate(struct nvfx_context *nvfx)
{
struct nouveau_stateobj *so = so_new(2, 2, 0);
struct pipe_stencil_ref *sr = &nvfx->stencil_ref;

so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1);
so_data (so, sr->ref_value[0]);
so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1);
so_data (so, sr->ref_value[1]);

so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]);
so_ref(NULL, &so);
return TRUE;
}

struct nvfx_state_entry nvfx_state_sr = {
.validate = nvfx_state_sr_validate,
.dirty = {
.pipe = NVFX_NEW_SR,
.hw = NVFX_STATE_SR
}
};

src/gallium/drivers/nv30/nv30_surface.c → src/gallium/drivers/nvfx/nvfx_surface.c 파일 보기

@@ -1,9 +1,9 @@

/**************************************************************************
*
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -11,11 +11,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -23,40 +23,40 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*
**************************************************************************/

#include "nv30_context.h"
#include "nvfx_context.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_tile.h"

static void
nv30_surface_copy(struct pipe_context *pipe,
nvfx_surface_copy(struct pipe_context *pipe,
struct pipe_surface *dest, unsigned destx, unsigned desty,
struct pipe_surface *src, unsigned srcx, unsigned srcy,
unsigned width, unsigned height)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv04_surface_2d *eng2d = nv30->screen->eng2d;
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;

eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
}

static void
nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
unsigned destx, unsigned desty, unsigned width,
unsigned height, unsigned value)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv04_surface_2d *eng2d = nv30->screen->eng2d;
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;

eng2d->fill(eng2d, dest, destx, desty, width, height, value);
}

void
nv30_init_surface_functions(struct nv30_context *nv30)
nvfx_init_surface_functions(struct nvfx_context *nvfx)
{
nv30->pipe.surface_copy = nv30_surface_copy;
nv30->pipe.surface_fill = nv30_surface_fill;
nvfx->pipe.surface_copy = nvfx_surface_copy;
nvfx->pipe.surface_fill = nvfx_surface_fill;
}

+ 133
- 0
src/gallium/drivers/nvfx/nvfx_tex.h 파일 보기

@@ -0,0 +1,133 @@
#ifndef NVFX_TEX_H_
#define NVFX_TEX_H_

static inline unsigned
nvfx_tex_wrap_mode(unsigned wrap) {
unsigned ret;

switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
ret = NV34TCL_TX_WRAP_S_REPEAT;
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_CLAMP:
ret = NV34TCL_TX_WRAP_S_CLAMP;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
break;
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
ret = NV34TCL_TX_WRAP_S_REPEAT;
break;
}

return ret >> NV34TCL_TX_WRAP_S_SHIFT;
}

static inline unsigned
nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso)
{
if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
switch (cso->compare_func) {
case PIPE_FUNC_NEVER:
return NV34TCL_TX_WRAP_RCOMP_NEVER;
case PIPE_FUNC_GREATER:
return NV34TCL_TX_WRAP_RCOMP_GREATER;
case PIPE_FUNC_EQUAL:
return NV34TCL_TX_WRAP_RCOMP_EQUAL;
case PIPE_FUNC_GEQUAL:
return NV34TCL_TX_WRAP_RCOMP_GEQUAL;
case PIPE_FUNC_LESS:
return NV34TCL_TX_WRAP_RCOMP_LESS;
case PIPE_FUNC_NOTEQUAL:
return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
case PIPE_FUNC_LEQUAL:
return NV34TCL_TX_WRAP_RCOMP_LEQUAL;
case PIPE_FUNC_ALWAYS:
return NV34TCL_TX_WRAP_RCOMP_ALWAYS;
default:
break;
}
}
return 0;
}

static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso)
{
unsigned filter = 0;
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
break;
}

switch (cso->min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
break;
}
break;
case PIPE_TEX_FILTER_NEAREST:
default:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
break;
}
break;
}
return filter;
}

static inline unsigned nvfx_tex_border_color(const float* border_color)
{
return ((float_to_ubyte(border_color[3]) << 24) |
(float_to_ubyte(border_color[0]) << 16) |
(float_to_ubyte(border_color[1]) << 8) |
(float_to_ubyte(border_color[2]) << 0));
}

struct nvfx_sampler_state {
uint32_t fmt;
uint32_t wrap;
uint32_t en;
uint32_t filt;
uint32_t bcol;
};

#endif /* NVFX_TEX_H_ */

src/gallium/drivers/nv30/nv30_transfer.c → src/gallium/drivers/nvfx/nvfx_transfer.c 파일 보기

@@ -5,18 +5,18 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "nouveau/nouveau_winsys.h"
#include "nv30_context.h"
#include "nv30_screen.h"
#include "nv30_state.h"
#include "nvfx_context.h"
#include "nvfx_screen.h"
#include "nvfx_state.h"

struct nv30_transfer {
struct nvfx_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
boolean direct;
};

static void
nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
nvfx_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
@@ -33,17 +33,17 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned h
}

static struct pipe_transfer *
nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
nvfx_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
enum pipe_transfer_usage usage,
unsigned x, unsigned y, unsigned w, unsigned h)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nv30_miptree *mt = (struct nv30_miptree *)pt;
struct nv30_transfer *tx;
struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
struct nvfx_transfer *tx;
struct pipe_texture tx_tex_template, *tx_tex;

tx = CALLOC_STRUCT(nv30_transfer);
tx = CALLOC_STRUCT(nvfx_transfer);
if (!tx)
return NULL;

@@ -72,7 +72,7 @@ nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,

tx->direct = false;

nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template);
nvfx_compatible_transfer_tex(pt, w, h, &tx_tex_template);

tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -81,7 +81,7 @@ nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
return NULL;
}

tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch;
tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch;

tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
@@ -97,7 +97,7 @@ nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
}

if (usage & PIPE_TRANSFER_READ) {
struct nv30_screen *nvscreen = nv30_screen(pscreen);
struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
struct pipe_surface *src;

src = pscreen->get_tex_surface(pscreen, pt,
@@ -118,14 +118,14 @@ nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
}

static void
nv30_transfer_del(struct pipe_context *pcontext,
nvfx_transfer_del(struct pipe_context *pcontext,
struct pipe_transfer *ptx)
{
struct nv30_transfer *tx = (struct nv30_transfer *)ptx;
struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;

if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
struct pipe_screen *pscreen = pcontext->screen;
struct nv30_screen *nvscreen = nv30_screen(pscreen);
struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
struct pipe_surface *dst;

dst = pscreen->get_tex_surface(pscreen, ptx->texture,
@@ -147,12 +147,12 @@ nv30_transfer_del(struct pipe_context *pcontext,
}

static void *
nv30_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
nvfx_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nv30_transfer *tx = (struct nv30_transfer *)ptx;
struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture;
struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));

@@ -163,20 +163,20 @@ nv30_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
}

static void
nv30_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
nvfx_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nv30_transfer *tx = (struct nv30_transfer *)ptx;
struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture;
struct pipe_screen *pscreen = pcontext->screen;
struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;

pipe_buffer_unmap(pscreen, mt->buffer);
}

void
nv30_init_transfer_functions(struct nv30_context *nv30)
nvfx_init_transfer_functions(struct nvfx_context *nvfx)
{
nv30->pipe.get_tex_transfer = nv30_transfer_new;
nv30->pipe.tex_transfer_destroy = nv30_transfer_del;
nv30->pipe.transfer_map = nv30_transfer_map;
nv30->pipe.transfer_unmap = nv30_transfer_unmap;
nvfx->pipe.get_tex_transfer = nvfx_transfer_new;
nvfx->pipe.tex_transfer_destroy = nvfx_transfer_del;
nvfx->pipe.transfer_map = nvfx_transfer_map;
nvfx->pipe.transfer_unmap = nvfx_transfer_unmap;
}

src/gallium/drivers/nv30/nv30_vbo.c → src/gallium/drivers/nvfx/nvfx_vbo.c 파일 보기

@@ -3,17 +3,24 @@
#include "util/u_inlines.h"
#include "util/u_format.h"

#include "nv30_context.h"
#include "nv30_state.h"
#include "nvfx_context.h"
#include "nvfx_state.h"

#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
#include "nouveau/nouveau_util.h"

#define FORCE_SWTNL 0
static boolean
nvfx_force_swtnl(struct nvfx_context *nvfx)
{
static int force_swtnl = -1;
if(force_swtnl < 0)
force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0);
return force_swtnl;
}

static INLINE int
nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
{
switch (pipe) {
case PIPE_FORMAT_R32_FLOAT:
@@ -69,15 +76,15 @@ nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
}

static boolean
nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib,
nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib,
unsigned ib_size)
{
struct pipe_screen *pscreen = &nv30->screen->base.base;
struct pipe_screen *pscreen = &nvfx->screen->base.base;
unsigned type;

if (!ib) {
nv30->idxbuf = NULL;
nv30->idxbuf_format = 0xdeadbeef;
nvfx->idxbuf = NULL;
nvfx->idxbuf_format = 0xdeadbeef;
return FALSE;
}

@@ -95,27 +102,27 @@ nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib,
return FALSE;
}

if (ib != nv30->idxbuf ||
type != nv30->idxbuf_format) {
nv30->dirty |= NV30_NEW_ARRAYS;
nv30->idxbuf = ib;
nv30->idxbuf_format = type;
if (ib != nvfx->idxbuf ||
type != nvfx->idxbuf_format) {
nvfx->dirty |= NVFX_NEW_ARRAYS;
nvfx->idxbuf = ib;
nvfx->idxbuf_format = type;
}

return TRUE;
}

static boolean
nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
nvfx_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so,
int attrib, struct pipe_vertex_element *ve,
struct pipe_vertex_buffer *vb)
{
struct pipe_screen *pscreen = nv30->pipe.screen;
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct pipe_screen *pscreen = nvfx->pipe.screen;
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned type, ncomp;
void *map;

if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp))
if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp))
return FALSE;

map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
@@ -128,25 +135,25 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,

switch (ncomp) {
case 4:
so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4);
so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4);
so_data (so, fui(v[0]));
so_data (so, fui(v[1]));
so_data (so, fui(v[2]));
so_data (so, fui(v[3]));
break;
case 3:
so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3);
so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3);
so_data (so, fui(v[0]));
so_data (so, fui(v[1]));
so_data (so, fui(v[2]));
break;
case 2:
so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2);
so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2);
so_data (so, fui(v[0]));
so_data (so, fui(v[1]));
break;
case 1:
so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1);
so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1);
so_data (so, fui(v[0]));
break;
default:
@@ -165,26 +172,26 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
}

void
nv30_draw_arrays(struct pipe_context *pipe,
nvfx_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_screen *screen = nv30->screen;
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nouveau_grobj *eng3d = screen->eng3d;
unsigned restart = 0;

nv30_vbo_set_idxbuf(nv30, NULL, 0);
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
return;
nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) {
nvfx_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);
return;
}

while (count) {
unsigned vc, nr;

nv30_state_emit(nv30);
nvfx_state_emit(nvfx);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256,
mode, start, count, &restart);
@@ -193,12 +200,12 @@ nv30_draw_arrays(struct pipe_context *pipe,
continue;
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

nr = (vc & 0xff);
if (nr) {
BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1);
OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -209,14 +216,14 @@ nv30_draw_arrays(struct pipe_context *pipe,

nr -= push;

BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push);
BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push);
while (push--) {
OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, 0);

count -= vc;
@@ -227,18 +234,18 @@ nv30_draw_arrays(struct pipe_context *pipe,
}

static INLINE void
nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_screen *screen = nv30->screen;
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nouveau_grobj *eng3d = screen->eng3d;

while (count) {
uint8_t *elts = (uint8_t *)ib + start;
unsigned vc, push, restart = 0;

nv30_state_emit(nv30);
nvfx_state_emit(nvfx);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2,
mode, start, count, &restart);
@@ -248,11 +255,11 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
}
count -= vc;

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

if (vc & 1) {
BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -262,7 +269,7 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,

push = MIN2(vc, 2047 * 2);

BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, (elts[i+1] << 16) | elts[i]);

@@ -270,7 +277,7 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
elts += push;
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, 0);

start = restart;
@@ -278,18 +285,18 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
}

static INLINE void
nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_screen *screen = nv30->screen;
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nouveau_grobj *eng3d = screen->eng3d;

while (count) {
uint16_t *elts = (uint16_t *)ib + start;
unsigned vc, push, restart = 0;

nv30_state_emit(nv30);
nvfx_state_emit(nvfx);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2,
mode, start, count, &restart);
@@ -299,11 +306,11 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
}
count -= vc;

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

if (vc & 1) {
BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -313,7 +320,7 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,

push = MIN2(vc, 2047 * 2);

BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, (elts[i+1] << 16) | elts[i]);

@@ -321,7 +328,7 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
elts += push;
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, 0);

start = restart;
@@ -329,18 +336,18 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
}

static INLINE void
nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_screen *screen = nv30->screen;
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nouveau_grobj *eng3d = screen->eng3d;

while (count) {
uint32_t *elts = (uint32_t *)ib + start;
unsigned vc, push, restart = 0;

nv30_state_emit(nv30);
nvfx_state_emit(nvfx);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1,
mode, start, count, &restart);
@@ -350,20 +357,20 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
}
count -= vc;

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

while (vc) {
push = MIN2(vc, 2047);

BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push);
BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push);
OUT_RINGp (chan, elts, push);

vc -= push;
elts += push;
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, 0);

start = restart;
@@ -371,11 +378,11 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
}

static void
nv30_draw_elements_inline(struct pipe_context *pipe,
nvfx_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nvfx_context *nvfx = nvfx_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
void *map;

@@ -387,13 +394,13 @@ nv30_draw_elements_inline(struct pipe_context *pipe,

switch (ib_size) {
case 1:
nv30_draw_elements_u08(nv30, map, mode, start, count);
nvfx_draw_elements_u08(nvfx, map, mode, start, count);
break;
case 2:
nv30_draw_elements_u16(nv30, map, mode, start, count);
nvfx_draw_elements_u16(nvfx, map, mode, start, count);
break;
case 4:
nv30_draw_elements_u32(nv30, map, mode, start, count);
nvfx_draw_elements_u32(nvfx, map, mode, start, count);
break;
default:
NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
@@ -404,19 +411,19 @@ nv30_draw_elements_inline(struct pipe_context *pipe,
}

static void
nv30_draw_elements_vbo(struct pipe_context *pipe,
nvfx_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_screen *screen = nv30->screen;
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *rankine = screen->rankine;
struct nouveau_grobj *eng3d = screen->eng3d;
unsigned restart = 0;

while (count) {
unsigned nr, vc;

nv30_state_emit(nv30);
nvfx_state_emit(nvfx);

vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256,
mode, start, count, &restart);
@@ -425,12 +432,12 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
continue;
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, nvgl_primitive(mode));

nr = (vc & 0xff);
if (nr) {
BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1);
OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -441,14 +448,14 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,

nr -= push;

BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push);
BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push);
while (push--) {
OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}

BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
OUT_RING (chan, 0);

count -= vc;
@@ -457,24 +464,24 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
}

void
nv30_draw_elements(struct pipe_context *pipe,
nvfx_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nvfx_context *nvfx = nvfx_context(pipe);
boolean idxbuf;

idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize);
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) {
nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize,
mode, start, count);
return;
}

if (idxbuf) {
nv30_draw_elements_vbo(pipe, mode, start, count);
nvfx_draw_elements_vbo(pipe, mode, start, count);
} else {
nv30_draw_elements_inline(pipe, indexBuffer, indexSize,
nvfx_draw_elements_inline(pipe, indexBuffer, indexSize,
mode, start, count);
}

@@ -482,49 +489,50 @@ nv30_draw_elements(struct pipe_context *pipe,
}

static boolean
nv30_vbo_validate(struct nv30_context *nv30)
nvfx_vbo_validate(struct nvfx_context *nvfx)
{
struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct pipe_buffer *ib = nv30->idxbuf;
unsigned ib_format = nv30->idxbuf_format;
struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_buffer *ib = nvfx->idxbuf;
unsigned ib_format = nvfx->idxbuf_format;
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;

vtxbuf = so_new(3, 17, 18);
so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt->num_elements);
so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements);
vtxfmt = so_new(1, 16, 0);
so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt->num_elements);
so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements);

for (hw = 0; hw < nv30->vtxelt->num_elements; hw++) {
for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) {
struct pipe_vertex_element *ve;
struct pipe_vertex_buffer *vb;
unsigned type, ncomp;

ve = &nv30->vtxelt->pipe[hw];
vb = &nv30->vtxbuf[ve->vertex_buffer_index];
ve = &nvfx->vtxelt->pipe[hw];
vb = &nvfx->vtxbuf[ve->vertex_buffer_index];

if (!vb->stride) {
if (!sattr)
sattr = so_new(16, 16 * 4, 0);

if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
if (nvfx_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) {
so_data(vtxbuf, 0);
so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT);
continue;
}
}

if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
/*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
so_ref(NULL, &vtxbuf);
so_ref(NULL, &vtxfmt);
return FALSE;
}

so_reloc(vtxbuf, nouveau_bo(vb->buffer), vb->buffer_offset +
ve->src_offset, vb_flags | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
so_reloc(vtxbuf, nouveau_bo(vb->buffer),
vb->buffer_offset + ve->src_offset,
vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
0, NV34TCL_VTXBUF_ADDRESS_DMA1);
so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
(ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type));
}
@@ -532,31 +540,31 @@ nv30_vbo_validate(struct nv30_context *nv30)
if (ib) {
struct nouveau_bo *bo = nouveau_bo(ib);

so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2);
so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2);
so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR,
0, NV34TCL_IDXBUF_FORMAT_DMA1);
}

so_method(vtxbuf, rankine, 0x1710, 1);
so_method(vtxbuf, eng3d, 0x1710, 1);
so_data (vtxbuf, 0);

so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]);
so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]);
so_ref(NULL, &vtxbuf);
nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF);
so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]);
nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF);
so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]);
so_ref(NULL, &vtxfmt);
nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT);
so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]);
nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT);
so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]);
so_ref(NULL, &sattr);
nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR);
nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR);
return FALSE;
}

struct nv30_state_entry nv30_state_vbo = {
.validate = nv30_vbo_validate,
struct nvfx_state_entry nvfx_state_vbo = {
.validate = nvfx_vbo_validate,
.dirty = {
.pipe = NV30_NEW_ARRAYS,
.pipe = NVFX_NEW_ARRAYS,
.hw = 0,
}
};

src/gallium/drivers/nvfx/nvfx_vertprog.c
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 1
- 0
src/gallium/drivers/r300/Makefile 파일 보기

@@ -14,6 +14,7 @@ C_SOURCES = \
r300_query.c \
r300_render.c \
r300_screen.c \
r300_screen_buffer.c \
r300_state.c \
r300_state_derived.c \
r300_state_invariant.c \

+ 28
- 10
src/gallium/drivers/r300/r300_context.c 파일 보기

@@ -24,6 +24,7 @@

#include "util/u_memory.h"
#include "util/u_simple_list.h"
#include "util/u_upload_mgr.h"

#include "r300_blit.h"
#include "r300_context.h"
@@ -55,6 +56,9 @@ static void r300_destroy_context(struct pipe_context* context)
FREE(query);
}

u_upload_destroy(r300->upload_vb);
u_upload_destroy(r300->upload_ib);

FREE(r300->blend_color_state.state);
FREE(r300->clip_state.state);
FREE(r300->fb_state.state);
@@ -72,8 +76,7 @@ r300_is_texture_referenced(struct pipe_context *pipe,
struct pipe_texture *texture,
unsigned face, unsigned level)
{
return pipe->is_buffer_referenced(pipe,
((struct r300_texture *)texture)->buffer);
return 0;
}

static unsigned int
@@ -157,16 +160,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
{
struct r300_context* r300 = CALLOC_STRUCT(r300_context);
struct r300_screen* r300screen = r300_screen(screen);
struct radeon_winsys* radeon_winsys = r300screen->radeon_winsys;
struct r300_winsys_screen *rws = r300screen->rws;

if (!r300)
return NULL;

r300screen->ctx = (struct pipe_context*)r300;

r300->winsys = radeon_winsys;
r300->rws = rws;

r300->context.winsys = (struct pipe_winsys*)radeon_winsys;
r300->context.winsys = (struct pipe_winsys*)rws;
r300->context.screen = screen;
r300->context.priv = priv;

@@ -212,16 +213,33 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,

r300_init_transfer_functions(r300);

/* r300_init_surface_functions(r300); */

r300_init_state_functions(r300);

r300->invariant_state.dirty = TRUE;

r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
rws->set_flush_cb(r300->rws, r300_flush_cb, r300);
r300->dirty_hw++;

r300->blitter = util_blitter_create(&r300->context);

r300->upload_ib = u_upload_create(screen,
32 * 1024, 16,
PIPE_BUFFER_USAGE_INDEX);

if (r300->upload_ib == NULL)
goto no_upload_ib;

r300->upload_vb = u_upload_create(screen,
128 * 1024, 16,
PIPE_BUFFER_USAGE_VERTEX);
if (r300->upload_vb == NULL)
goto no_upload_vb;

return &r300->context;

no_upload_ib:
u_upload_destroy(r300->upload_ib);
no_upload_vb:
FREE(r300);
return NULL;
}

+ 7
- 2
src/gallium/drivers/r300/r300_context.h 파일 보기

@@ -32,6 +32,7 @@

#include "r300_screen.h"

struct u_upload_mgr;
struct r300_context;

struct r300_fragment_shader;
@@ -268,7 +269,7 @@ struct r300_texture {
boolean is_npot;

/* Pipe buffer backing this texture. */
struct pipe_buffer* buffer;
struct r300_winsys_buffer *buffer;

/* Registers carrying texture format data. */
struct r300_texture_format_state state;
@@ -302,7 +303,7 @@ struct r300_context {
struct pipe_context context;

/* The interface to the windowing system, etc. */
struct radeon_winsys* winsys;
struct r300_winsys_screen *rws;
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
/* Accelerated blit support. */
@@ -369,6 +370,7 @@ struct r300_context {
int vertex_buffer_max_index;
/* Vertex elements for Gallium. */
struct r300_vertex_element_state *velems;
bool any_user_vbs;

/* Vertex info for Draw. */
struct vertex_info vertex_info;
@@ -389,6 +391,9 @@ struct r300_context {
uint32_t zbuffer_bpp;
/* Whether scissor is enabled. */
boolean scissor_enabled;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
};

/* Convenience cast wrapper. */

+ 17
- 5
src/gallium/drivers/r300/r300_cs.h 파일 보기

@@ -51,7 +51,7 @@

#define CS_LOCALS(context) \
struct r300_context* const cs_context_copy = (context); \
struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \
struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \
int cs_count = 0; (void) cs_count;

#define CHECK_CS(size) \
@@ -105,22 +105,34 @@
cs_count--; \
} while (0)

#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \
DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \
"domains (%d, %d, %d)\n", \
bo, offset, rd, wd, flags); \
assert(bo); \
cs_winsys->write_cs_dword(cs_winsys, offset); \
cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \
cs_count -= 3; \
} while (0)

#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \

#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \
DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \
"domains (%d, %d, %d)\n", \
tex, offset, rd, wd, flags); \
assert(tex); \
cs_winsys->write_cs_dword(cs_winsys, offset); \
r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \
cs_count -= 3; \
} while (0)


#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \
DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \
"domains (%d, %d, %d)\n", \
bo, rd, wd, flags); \
assert(bo); \
cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \
cs_count -= 2; \
} while (0)


+ 1
- 0
src/gallium/drivers/r300/r300_debug.c 파일 보기

@@ -37,6 +37,7 @@ static struct debug_option debug_options[] = {
{ "draw", DBG_DRAW, "Draw and emit" },
{ "tex", DBG_TEX, "Textures" },
{ "fall", DBG_FALL, "Fallbacks" },
{ "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking purposes only!)" },

{ "all", ~0, "Convenience option that enables all debug flags" },


+ 41
- 34
src/gallium/drivers/r300/r300_emit.c 파일 보기

@@ -32,6 +32,8 @@
#include "r300_emit.h"
#include "r300_fs.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
#include "r300_state_inlines.h"
#include "r300_vs.h"

void r300_emit_blend_state(struct r300_context* r300,
@@ -415,10 +417,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
assert(tex && tex->buffer && "cbuf is marked, but NULL!");

OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);

OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, tex->fb_state.colorpitch[surf->level],
OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level],
0, RADEON_GEM_DOMAIN_VRAM, 0);

OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt);
@@ -434,12 +436,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");

OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);

OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format);

OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
OUT_CS_RELOC(tex->buffer, tex->fb_state.depthpitch[surf->level],
OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level],
0, RADEON_GEM_DOMAIN_VRAM, 0);
}

@@ -448,7 +450,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
END_CS;
}

static void r300_emit_query_start(struct r300_context *r300)
void r300_emit_query_start(struct r300_context *r300)
{
struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
struct r300_query *query = r300->query_current;
@@ -491,13 +493,13 @@ static void r300_emit_query_finish(struct r300_context *r300,
/* pipe 3 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3),
OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3),
0, RADEON_GEM_DOMAIN_GTT, 0);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2),
OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2),
0, RADEON_GEM_DOMAIN_GTT, 0);
case 2:
/* pipe 1 only */
@@ -505,13 +507,13 @@ static void r300_emit_query_finish(struct r300_context *r300,
OUT_CS_REG(R300_SU_REG_DEST,
1 << (caps->high_second_pipe ? 3 : 1));
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1),
OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1),
0, RADEON_GEM_DOMAIN_GTT, 0);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0),
OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0),
0, RADEON_GEM_DOMAIN_GTT, 0);
break;
default:
@@ -533,7 +535,7 @@ static void rv530_emit_query_single(struct r300_context *r300,
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -546,10 +548,10 @@ static void rv530_emit_query_double(struct r300_context *r300,
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -747,7 +749,7 @@ void r300_emit_textures_state(struct r300_context *r300,
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format[2]);

OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
OUT_CS_RELOC(allstate->textures[i]->buffer, texstate->tile_config,
OUT_CS_TEX_RELOC(allstate->textures[i], texstate->tile_config,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0);
}
}
@@ -788,8 +790,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
}

for (i = 0; i < aos_count; i++) {
OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer,
RADEON_GEM_DOMAIN_GTT, 0, 0);
OUT_CS_BUF_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer,
RADEON_GEM_DOMAIN_GTT, 0, 0);
}
END_CS;
}
@@ -814,7 +816,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
OUT_CS(r300->vbo_offset);
OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
OUT_CS_BUF_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
END_CS;
}

@@ -1009,16 +1011,22 @@ void r300_emit_buffer_validate(struct r300_context *r300,
unsigned i;
boolean invalid = FALSE;

/* upload buffers first */
if (r300->any_user_vbs) {
r300_upload_user_buffers(r300);
r300->any_user_vbs = false;
}

/* Clean out BOs. */
r300->winsys->reset_bos(r300->winsys);
r300->rws->reset_bos(r300->rws);

validate:
/* Color buffers... */
for (i = 0; i < fb->nr_cbufs; i++) {
tex = (struct r300_texture*)fb->cbufs[i]->texture;
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
0, RADEON_GEM_DOMAIN_VRAM)) {
if (!r300_add_texture(r300->rws, tex,
0, RADEON_GEM_DOMAIN_VRAM)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
@@ -1027,8 +1035,8 @@ validate:
if (fb->zsbuf) {
tex = (struct r300_texture*)fb->zsbuf->texture;
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
0, RADEON_GEM_DOMAIN_VRAM)) {
if (!r300_add_texture(r300->rws, tex,
0, RADEON_GEM_DOMAIN_VRAM)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
@@ -1038,24 +1046,24 @@ validate:
if (!r300->fragment_sampler_views[i])
continue;
tex = (struct r300_texture *)r300->fragment_sampler_views[i]->texture;
if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
if (!r300_add_texture(r300->rws, tex,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
}
/* ...occlusion query buffer... */
if (r300->dirty_state & R300_NEW_QUERY) {
if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
0, RADEON_GEM_DOMAIN_GTT)) {
if (!r300_add_buffer(r300->rws, r300->oqbo,
0, RADEON_GEM_DOMAIN_GTT)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
}
/* ...vertex buffer for SWTCL path... */
if (r300->vbo) {
if (!r300->winsys->add_buffer(r300->winsys, r300->vbo,
RADEON_GEM_DOMAIN_GTT, 0)) {
if (!r300_add_buffer(r300->rws, r300->vbo,
RADEON_GEM_DOMAIN_GTT, 0)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
@@ -1065,23 +1073,22 @@ validate:
for (i = 0; i < r300->velems->count; i++) {
pbuf = vbuf[velem[i].vertex_buffer_index].buffer;

if (!r300->winsys->add_buffer(r300->winsys, pbuf,
RADEON_GEM_DOMAIN_GTT, 0)) {
r300->context.flush(&r300->context, 0, NULL);
if (!r300_add_buffer(r300->rws, pbuf,
RADEON_GEM_DOMAIN_GTT, 0)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer) {
if (!r300->winsys->add_buffer(r300->winsys, index_buffer,
RADEON_GEM_DOMAIN_GTT, 0)) {
if (!r300_add_buffer(r300->rws, index_buffer,
RADEON_GEM_DOMAIN_GTT, 0)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
}

if (!r300->winsys->validate(r300->winsys)) {
if (!r300->rws->validate(r300->rws)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
/* Well, hell. */

+ 1
- 2
src/gallium/drivers/r300/r300_emit.h 파일 보기

@@ -57,8 +57,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300,

void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state);

void r300_emit_query_begin(struct r300_context* r300,
struct r300_query* query);
void r300_emit_query_start(struct r300_context* r300);

void r300_emit_query_end(struct r300_context* r300);


+ 4
- 0
src/gallium/drivers/r300/r300_reg.h 파일 보기

@@ -1500,6 +1500,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ANISO_THRESHOLD_MASK (7<<17)

# define R500_MACRO_SWITCH (1<<22)
# define R500_TX_MAX_ANISO(x) ((x) << 23)
# define R500_TX_MAX_ANISO_MASK (63 << 23)
# define R500_TX_ANISO_HIGH_QUALITY (1 << 30)

# define R500_BORDER_FIX (1<<31)

#define R300_TX_FORMAT0_0 0x4480

+ 41
- 6
src/gallium/drivers/r300/r300_render.c 파일 보기

@@ -30,10 +30,12 @@

#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/u_prim.h"

#include "r300_cs.h"
#include "r300_context.h"
#include "r300_screen_buffer.h"
#include "r300_emit.h"
#include "r300_reg.h"
#include "r300_render.h"
@@ -123,7 +125,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
static boolean r300_reserve_cs_space(struct r300_context *r300,
unsigned dwords)
{
if (!r300->winsys->check_cs(r300->winsys, dwords)) {
if (!r300->rws->check_cs(r300->rws, dwords)) {
r300->context.flush(&r300->context, 0, NULL);
return TRUE;
}
@@ -131,9 +133,37 @@ static boolean r300_reserve_cs_space(struct r300_context *r300,
}

static boolean immd_is_good_idea(struct r300_context *r300,
unsigned count)
unsigned count)
{
return count <= 4;
struct pipe_vertex_element* velem;
struct pipe_vertex_buffer* vbuf;
boolean checked[PIPE_MAX_ATTRIBS] = {0};
unsigned vertex_element_count = r300->velems->count;
unsigned i, vbi;

if (count > 4) {
return FALSE;
}

/* We shouldn't map buffers referenced by CS, busy buffers,
* and ones placed in VRAM. */
/* XXX Check for VRAM buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
vbi = velem->vertex_buffer_index;

if (!checked[vbi]) {
vbuf = &r300->vertex_buffer[vbi];

if (r300_buffer_is_referenced(r300,
vbuf->buffer)) {
/* It's a very bad idea to map it... */
return FALSE;
}
checked[vbi] = TRUE;
}
}
return TRUE;
}

static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
@@ -316,8 +346,8 @@ static void r300_emit_draw_elements(struct r300_context *r300,
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
OUT_CS_RELOC(indexBuffer, count_dwords,
RADEON_GEM_DOMAIN_GTT, 0, 0);
OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
RADEON_GEM_DOMAIN_GTT, 0, 0);

END_CS;
}
@@ -384,12 +414,16 @@ void r300_draw_range_elements(struct pipe_context* pipe,

r300_update_derived_state(r300);

r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count);

/* 128 dwords for emit_aos and emit_draw_elements */
r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128);
r300_emit_buffer_validate(r300, TRUE, indexBuffer);
r300_emit_dirty_state(r300);
r300_emit_aos(r300, 0);

u_upload_flush(r300->upload_vb);
u_upload_flush(r300->upload_ib);
if (alt_num_verts || count <= 65535) {
r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
maxIndex, mode, start, count);
@@ -412,7 +446,7 @@ void r300_draw_range_elements(struct pipe_context* pipe,
}

if (indexBuffer != orgIndexBuffer) {
pipe->screen->buffer_destroy(indexBuffer);
pipe_buffer_reference( &indexBuffer, NULL );
}
}

@@ -476,6 +510,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
}
} while (count);
}
u_upload_flush(r300->upload_vb);
}
}


+ 0
- 0
src/gallium/drivers/r300/r300_screen.c 파일 보기


이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.

Loading…
취소
저장