Sfoglia il codice sorgente

Merge remote branch 'origin/master' into lp-setup-llvm

Conflicts:
	src/gallium/drivers/llvmpipe/lp_setup_coef.c
	src/gallium/drivers/llvmpipe/lp_setup_coef.h
	src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
	src/gallium/drivers/llvmpipe/lp_setup_point.c
	src/gallium/drivers/llvmpipe/lp_setup_tri.c
	src/gallium/drivers/llvmpipe/lp_state_derived.c
	src/gallium/drivers/llvmpipe/lp_state_fs.h
tags/snb-magic
Keith Whitwell 15 anni fa
parent
commit
0072acd447
100 ha cambiato i file con 4620 aggiunte e 2207 eliminazioni
  1. 3
    0
      Makefile
  2. 1
    1
      SConstruct
  3. 4
    2
      common.py
  4. 1
    1
      configs/linux-dri
  5. 35
    14
      configure.ac
  6. 3
    3
      docs/GL3.txt
  7. 5
    4
      docs/devinfo.html
  8. 2
    2
      docs/egl.html
  9. 6
    4
      docs/fbdev-dri.html
  10. 16
    1
      docs/news.html
  11. 53
    0
      docs/relnotes-7.10.html
  12. 96
    3
      docs/relnotes-7.8.2.html
  13. 89
    0
      docs/relnotes-7.8.3.html
  14. 79
    5
      docs/relnotes-7.9.html
  15. 1
    0
      docs/relnotes.html
  16. 2
    2
      include/GL/internal/dri_interface.h
  17. 2
    0
      scons/crossmingw.py
  18. 43
    21
      scons/gallium.py
  19. 1
    1
      scons/llvm.py
  20. 1
    1
      scons/wcesdk.py
  21. 55
    12
      src/egl/drivers/dri2/egl_dri2.c
  22. 31
    2
      src/egl/drivers/glx/egl_glx.c
  23. 1
    1
      src/egl/main/Makefile
  24. 41
    11
      src/egl/main/eglapi.c
  25. 72
    153
      src/egl/main/eglconfig.c
  26. 87
    46
      src/egl/main/eglconfig.h
  27. 6
    5
      src/egl/main/eglcontext.c
  28. 1
    1
      src/egl/main/egldisplay.c
  29. 0
    1
      src/egl/main/egldriver.c
  30. 7
    7
      src/egl/main/eglsurface.c
  31. 1
    1
      src/gallium/Makefile.template
  32. 3
    0
      src/gallium/auxiliary/Makefile
  33. 3
    0
      src/gallium/auxiliary/SConscript
  34. 55
    12
      src/gallium/auxiliary/draw/draw_context.c
  35. 15
    4
      src/gallium/auxiliary/draw/draw_context.h
  36. 73
    0
      src/gallium/auxiliary/draw/draw_fs.c
  37. 42
    0
      src/gallium/auxiliary/draw/draw_fs.h
  38. 43
    6
      src/gallium/auxiliary/draw/draw_llvm.c
  39. 17
    7
      src/gallium/auxiliary/draw/draw_llvm.h
  40. 8
    0
      src/gallium/auxiliary/draw/draw_llvm_sample.c
  41. 4
    5
      src/gallium/auxiliary/draw/draw_pipe_aaline.c
  42. 4
    4
      src/gallium/auxiliary/draw/draw_pipe_aapoint.c
  43. 31
    18
      src/gallium/auxiliary/draw/draw_pipe_clip.c
  44. 2
    2
      src/gallium/auxiliary/draw/draw_pipe_validate.c
  45. 58
    60
      src/gallium/auxiliary/draw/draw_pipe_wide_point.c
  46. 14
    3
      src/gallium/auxiliary/draw/draw_private.h
  47. 427
    84
      src/gallium/auxiliary/gallivm/lp_bld_arit.c
  48. 23
    0
      src/gallium/auxiliary/gallivm/lp_bld_arit.h
  49. 178
    40
      src/gallium/auxiliary/gallivm/lp_bld_conv.c
  50. 14
    5
      src/gallium/auxiliary/gallivm/lp_bld_debug.c
  51. 6
    5
      src/gallium/auxiliary/gallivm/lp_bld_debug.h
  52. 87
    517
      src/gallium/auxiliary/gallivm/lp_bld_flow.c
  53. 33
    26
      src/gallium/auxiliary/gallivm/lp_bld_flow.h
  54. 55
    9
      src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
  55. 1
    0
      src/gallium/auxiliary/gallivm/lp_bld_init.c
  56. 6
    0
      src/gallium/auxiliary/gallivm/lp_bld_init.h
  57. 22
    4
      src/gallium/auxiliary/gallivm/lp_bld_logic.c
  58. 10
    0
      src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
  59. 21
    0
      src/gallium/auxiliary/gallivm/lp_bld_printf.c
  60. 4
    0
      src/gallium/auxiliary/gallivm/lp_bld_printf.h
  61. 18
    10
      src/gallium/auxiliary/gallivm/lp_bld_quad.c
  62. 564
    191
      src/gallium/auxiliary/gallivm/lp_bld_sample.c
  63. 93
    42
      src/gallium/auxiliary/gallivm/lp_bld_sample.h
  64. 289
    332
      src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
  65. 0
    9
      src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
  66. 366
    328
      src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
  67. 77
    0
      src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
  68. 8
    0
      src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
  69. 77
    0
      src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
  70. 479
    0
      src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
  71. 61
    11
      src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
  72. 20
    13
      src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
  73. 10
    10
      src/gallium/auxiliary/rbug/rbug_context.c
  74. 5
    5
      src/gallium/auxiliary/rbug/rbug_core.c
  75. 64
    0
      src/gallium/auxiliary/rbug/rbug_demarshal.c
  76. 5
    0
      src/gallium/auxiliary/rbug/rbug_proto.h
  77. 6
    6
      src/gallium/auxiliary/rbug/rbug_shader.c
  78. 7
    7
      src/gallium/auxiliary/rbug/rbug_texture.c
  79. 0
    1
      src/gallium/auxiliary/rtasm/rtasm_execmem.c
  80. 24
    15
      src/gallium/auxiliary/target-helpers/inline_sw_helper.h
  81. 12
    6
      src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
  82. 28
    24
      src/gallium/auxiliary/tgsi/tgsi_dump.c
  83. 9
    0
      src/gallium/auxiliary/tgsi/tgsi_dump.h
  84. 4
    2
      src/gallium/auxiliary/tgsi/tgsi_exec.c
  85. 4
    0
      src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
  86. 6
    3
      src/gallium/auxiliary/tgsi/tgsi_scan.c
  87. 2
    0
      src/gallium/auxiliary/tgsi/tgsi_scan.h
  88. 39
    17
      src/gallium/auxiliary/tgsi/tgsi_sse2.c
  89. 11
    5
      src/gallium/auxiliary/tgsi/tgsi_ureg.c
  90. 20
    4
      src/gallium/auxiliary/tgsi/tgsi_ureg.h
  91. 47
    0
      src/gallium/auxiliary/util/u_atomic.h
  92. 32
    15
      src/gallium/auxiliary/util/u_blitter.c
  93. 6
    0
      src/gallium/auxiliary/util/u_blitter.h
  94. 3
    0
      src/gallium/auxiliary/util/u_format.csv
  95. 53
    0
      src/gallium/auxiliary/util/u_format_zs.c
  96. 16
    0
      src/gallium/auxiliary/util/u_format_zs.h
  97. 127
    0
      src/gallium/auxiliary/util/u_index_modify.c
  98. 19
    35
      src/gallium/auxiliary/util/u_index_modify.h
  99. 5
    0
      src/gallium/auxiliary/util/u_math.h
  100. 0
    0
      src/gallium/auxiliary/util/u_pack_color.h

+ 3
- 0
Makefile Vedi File

@@ -329,6 +329,8 @@ GALLIUM_FILES = \
$(DIRECTORY)/src/gallium/Makefile.template \
$(DIRECTORY)/src/gallium/SConscript \
$(DIRECTORY)/src/gallium/targets/Makefile.dri \
$(DIRECTORY)/src/gallium/targets/Makefile.xorg \
$(DIRECTORY)/src/gallium/targets/SConscript.dri \
$(DIRECTORY)/src/gallium/*/Makefile \
$(DIRECTORY)/src/gallium/*/SConscript \
$(DIRECTORY)/src/gallium/*/*/Makefile \
@@ -356,6 +358,7 @@ DRI_FILES = \
$(DIRECTORY)/src/mesa/drivers/dri/common/xmlpool/*.[ch] \
$(DIRECTORY)/src/mesa/drivers/dri/common/xmlpool/*.po \
$(DIRECTORY)/src/mesa/drivers/dri/*/*.[chS] \
$(DIRECTORY)/src/mesa/drivers/dri/*/*.cpp \
$(DIRECTORY)/src/mesa/drivers/dri/*/*/*.[chS] \
$(DIRECTORY)/src/mesa/drivers/dri/*/Makefile \
$(DIRECTORY)/src/mesa/drivers/dri/*/*/Makefile \

+ 1
- 1
SConstruct Vedi File

@@ -208,7 +208,7 @@ Export('env')

SConscript(
'src/SConscript',
variant_dir = env['build'],
variant_dir = env['build_dir'],
duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html
)


+ 4
- 2
common.py Vedi File

@@ -81,8 +81,8 @@ def AddOptions(opts):
from SCons.Variables.EnumVariable import EnumVariable as EnumOption
except ImportError:
from SCons.Options.EnumOption import EnumOption
opts.Add(BoolOption('debug', 'debug build', 'yes'))
opts.Add(BoolOption('profile', 'profile build', 'no'))
opts.Add(EnumOption('build', 'build type', 'debug',
allowed_values=('debug', 'checked', 'profile', 'release')))
opts.Add(BoolOption('quiet', 'quiet command lines', 'yes'))
opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
@@ -91,3 +91,5 @@ def AddOptions(opts):
opts.Add('toolchain', 'compiler toolchain', 'default')
opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
opts.Add(BoolOption('dri', 'build DRI drivers', default_dri))
opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))

+ 1
- 1
configs/linux-dri Vedi File

@@ -58,7 +58,7 @@ EGL_DRIVERS_DIRS = glx

DRIVER_DIRS = dri
GALLIUM_WINSYS_DIRS = sw sw/xlib drm/vmware drm/intel drm/i965
GALLIUM_TARGET_DIRS = egl-swrast
GALLIUM_TARGET_DIRS =
GALLIUM_STATE_TRACKERS_DIRS = egl

DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon \

+ 35
- 14
configure.ac Vedi File

@@ -33,6 +33,10 @@ AC_CHECK_PROGS([MAKE], [gmake make])
AC_PATH_PROG([MKDEP], [makedepend])
AC_PATH_PROG([SED], [sed])

if test "x$MKDEP" = "x"; then
AC_MSG_ERROR([makedepend is required to build Mesa])
fi

dnl Our fallback install-sh is a symlink to minstall. Use the existing
dnl configuration in that case.
AC_PROG_INSTALL
@@ -692,6 +696,11 @@ AC_SUBST([GLESv2_PC_LIB_PRIV])

AC_SUBST([HAVE_XF86VIDMODE])

PKG_CHECK_MODULES([LIBDRM_RADEON],
[libdrm_radeon libdrm >= $LIBDRM_RADEON_REQUIRED],
HAVE_LIBDRM_RADEON=yes,
HAVE_LIBDRM_RADEON=no)

dnl
dnl More X11 setup
dnl
@@ -910,12 +919,7 @@ esac

case $DRI_DIRS in
*radeon*|*r200*|*r300*|*r600*)
PKG_CHECK_MODULES([LIBDRM_RADEON],
[libdrm_radeon libdrm >= $LIBDRM_RADEON_REQUIRED],
HAVE_LIBDRM_RADEON=yes,
HAVE_LIBDRM_RADEON=no)

if test "$HAVE_LIBDRM_RADEON" = yes; then
if test "x$HAVE_LIBDRM_RADEON" = xyes; then
RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
fi
@@ -1363,7 +1367,7 @@ fi
AC_ARG_WITH([egl-platforms],
[AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
[comma delimited native platforms libEGL supports, e.g.
"x11,kms" @<:@default=auto@:>@])],
"x11,drm" @<:@default=auto@:>@])],
[with_egl_platforms="$withval"],
[with_egl_platforms=yes])
AC_ARG_WITH([egl-displays],
@@ -1376,6 +1380,9 @@ case "$with_egl_platforms" in
yes)
if test "x$enable_egl" = xyes && test "x$mesa_driver" != xosmesa; then
EGL_PLATFORMS="x11"
if test "$mesa_driver" = dri; then
EGL_PLATFORMS="$EGL_PLATFORMS drm"
fi
fi
;;
*)
@@ -1518,18 +1525,28 @@ elif test "x$enable_gallium_i965" = xauto; then
fi

dnl
dnl Gallium Radeon configuration
dnl Gallium Radeon r300g configuration
dnl
AC_ARG_ENABLE([gallium-radeon],
[AS_HELP_STRING([--enable-gallium-radeon],
[build gallium radeon @<:@default=disabled@:>@])],
[enable_gallium_radeon="$enableval"],
[enable_gallium_radeon=auto])
if test "x$enable_gallium_radeon" = xauto; then
if test "x$HAVE_LIBDRM_RADEON" = xyes; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
gallium_check_st "radeon/drm" "dri-r300"
else
AC_MSG_WARN([libdrm_radeon is missing, not building gallium-radeon (r300)])
fi
fi
if test "x$enable_gallium_radeon" = xyes; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon"
elif test "x$enable_gallium_radeon" = xauto; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
if test "x$HAVE_LIBDRM_RADEON" = xyes; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon"
else
AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-radeon (r300)])
fi
fi

dnl
@@ -1541,8 +1558,12 @@ AC_ARG_ENABLE([gallium-r600],
[enable_gallium_r600="$enableval"],
[enable_gallium_r600=auto])
if test "x$enable_gallium_r600" = xyes; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
gallium_check_st "r600/drm" "dri-r600"
if test "x$HAVE_LIBDRM_RADEON" = xyes; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
gallium_check_st "r600/drm" "dri-r600"
else
AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-r600])
fi
fi

dnl

+ 3
- 3
docs/GL3.txt Vedi File

@@ -25,7 +25,7 @@ Non-normalized Integer texture/framebuffer formats not started
Packed depth/stencil formats DONE
Per-buffer blend and masks (GL_EXT_draw_buffers2) DONE
GL_EXT_texture_compression_rgtc not started
Red and red/green texture formats Ian?
Red and red/green texture formats DONE (swrast, i965)
Transform feedback (GL_EXT_transform_feedback) ~50% done
glBindFragDataLocation, glGetFragDataLocation,
glBindBufferRange, glBindBufferBase commands
@@ -44,7 +44,7 @@ Instanced drawing (GL_ARB_draw_instanced) ~50% done
Buffer copying (GL_ARB_copy_buffer) DONE
Primitive restart (GL_NV_primitive_restart) not started
16 vertex texture image units not started
Texture buffer objs (GL_ARB_textur_buffer_object) not started
Texture buffer objs (GL_ARB_texture_buffer_object) not started
Rectangular textures (GL_ARB_texture_rectangle) DONE
Uniform buffer objs (GL_ARB_uniform_buffer_object) not started
Signed normalized texture formats ~50% done
@@ -69,7 +69,7 @@ GL 3.3:

GLSL 3.30 not started
GL_ARB_blend_func_extended not started
GL_ARB_explicit_attrib_location not started
GL_ARB_explicit_attrib_location DONE (swrast, i915, i965)
GL_ARB_occlusion_query2 not started
GL_ARB_sampler_objects not started
GL_ARB_texture_rgb10_a2ui not started

+ 5
- 4
docs/devinfo.html Vedi File

@@ -145,7 +145,7 @@ Make sure the values in src/mesa/main/version.h are correct.
</p>

<p>
Update the docs/news.html file and docs/download.html files.
Update docs/news.html.
</p>

<p>
@@ -208,10 +208,11 @@ sftp USERNAME,mesa3d@web.sourceforge.net

<p>
Make an announcement on the mailing lists:
<em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>d</em><em>e</em><em>v</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>,
<em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>u</em><em>s</em><em>e</em><em>r</em><em>s</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>

<em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>d</em><em>e</em><em>v</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>,
<em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>u</em><em>s</em><em>e</em><em>r</em><em>s</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>
and
<em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>a</em><em>n</em><em>n</em><em>o</em><em>u</em><em>n</em><em>c</em><em>e</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>
<em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>a</em><em>n</em><em>n</em><em>o</em><em>u</em><em>n</em><em>c</em><em>e</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>
</p>



+ 2
- 2
docs/egl.html Vedi File

@@ -72,13 +72,13 @@ drivers will be installed to <code>${libdir}/egl</code>.</p>
<li><code>--with-egl-platforms</code>

<p>List the platforms (window systems) to support. Its argument is a comma
seprated string such as <code>--with-egl-platforms=x11,kms</code>. It decides
seprated string such as <code>--with-egl-platforms=x11,drm</code>. It decides
the platforms a driver may support. The first listed platform is also used by
the main library to decide the native platform: the platform the EGL native
types such as <code>EGLNativeDisplayType</code> or
<code>EGLNativeWindowType</code> defined for.</p>

<p>The available platforms are <code>x11</code>, <code>kms</code>,
<p>The available platforms are <code>x11</code>, <code>drm</code>,
<code>fbdev</code>, and <code>gdi</code>. The <code>gdi</code> platform can
only be built with SCons.</p>


+ 6
- 4
docs/fbdev-dri.html Vedi File

@@ -9,6 +9,9 @@
<center><h1>Mesa fbdev/DRI Drivers</h1></center>
<br>

<h1><center>NOTE: this information is obsolete and will be removed at
a future date</center></h1>

<h1>1. Introduction</h1>

<p>
@@ -22,7 +25,7 @@ Contributors to this project include Jon Smirl, Keith Whitwell and Dave Airlie.

<p>
Applications in the fbdev/DRI environment use
the <a href="http://www.nabble.com/file/p15480666/MiniGXL.html"> MiniGLX</a> interface to choose pixel
the MiniGLX interface to choose pixel
formats, create rendering contexts, etc. It's a subset of the GLX and
Xlib interfaces allowing some degree of application portability between
the X and X-less environments.
@@ -315,8 +318,7 @@ It means that the sample_server process is not running.
<h1>5.0 Programming Information</h1>

<p>
OpenGL/Mesa is interfaced to fbdev via the <a href="http://www.nabble.com/file/p15480666/MiniGLX.html">MiniGLX</a>
interface.
OpenGL/Mesa is interfaced to fbdev via the MiniGLX interface.
MiniGLX is a subset of Xlib and GLX API functions which provides just
enough functionality to setup OpenGL rendering and respond to simple
input events.
@@ -332,7 +334,7 @@ This allows some degree of flexibility for software development and testing.
However, the MiniGLX API is not binary-compatible with full Xlib/GLX.
Some of the structures are different and some macros/functions work
differently.
See the <code>GL/miniglx.h</code> header file for details.
See the GL/miniglx.h header file for details.
</p>



+ 16
- 1
docs/news.html Vedi File

@@ -11,6 +11,22 @@
<H1>News</H1>


<h2>October 4, 2010</h2>

<p>
<a href="relnotes-7.9.html">Mesa 7.9</a> (final) is released. This is a new
development release.
</p>


<h2>September 27, 2010</h2>

<p>
<a href="relnotes-7.9.html">Mesa 7.9.0-rc1</a> is released. This is a
release candidate for the 7.9 development release.
</p>


<h2>June 16, 2010</h2>

<p>
@@ -1277,7 +1293,6 @@ grateful.
<p>
</p><h2>March 18, 1999</h2>
<p>The new webpages are now online. Enjoy, and let me know if you find any errors.
For an eye-candy free version you can use <a href="http://www.mesa3d.org/txt/">http://www.mesa3d.org/txt/</a>.</p>
<p>
</p><h2>February 16, 1999</h2>
<p><a href="http://www.sgi.com/">SGI</a> releases its <a href="http://www.sgi.com/software/opensource/glx/">GLX

+ 53
- 0
docs/relnotes-7.10.html Vedi File

@@ -0,0 +1,53 @@
<HTML>

<TITLE>Mesa Release Notes</TITLE>

<head><link rel="stylesheet" type="text/css" href="mesa.css"></head>

<BODY>

<body bgcolor="#eeeeee">

<H1>Mesa 7.10 Release Notes / tbd</H1>

<p>
Mesa 7.10 is a new development release.
People who are concerned with stability and reliability should stick
with a previous release or wait for Mesa 7.10.1.
</p>
<p>
Mesa 7.10 implements the OpenGL 2.1 API, but the version reported by
glGetString(GL_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 2.1.
</p>
<p>
See the <a href="install.html">Compiling/Installing page</a> for prerequisites
for DRI hardware acceleration.
</p>


<h2>MD5 checksums</h2>
<pre>
tbd
</pre>


<h2>New features</h2>
<ul>
<li>GL_ARB_explicit_attrib_location extension (Intel and software drivers).
</ul>


<h2>Bug fixes</h2>
<ul>
<li>tbd</li>
</ul>


<h2>Changes</h2>
<ul>
<li>tbd</li>
</ul>

</body>
</html>

+ 96
- 3
docs/relnotes-7.8.2.html Vedi File

@@ -26,7 +26,15 @@ for DRI hardware acceleration.

<h2>MD5 checksums</h2>
<pre>
tbd
c89b63d253605ed40e8ac370d25a833c MesaLib-7.8.2.tar.gz
6be2d343a0089bfd395ce02aaf8adb57 MesaLib-7.8.2.tar.bz2
a04ad3b06ac5ff3969a003fa7bbf7d5b MesaLib-7.8.2.zip
7c213f92efeb471f0331670d5079d4c0 MesaDemos-7.8.2.tar.gz
757d9e2e06f48b1a52848be9b0307ced MesaDemos-7.8.2.tar.bz2
8d0e5cfe68b8ebf90265d350ae2c48b1 MesaDemos-7.8.2.zip
b74482e3f44f35ed395c4aada4fd8240 MesaGLUT-7.8.2.tar.gz
a471807b65e49c325808ba4551be93ed MesaGLUT-7.8.2.tar.bz2
9f190268c42be582ef66e47365ee61e3 MesaGLUT-7.8.2.zip
</pre>


@@ -44,10 +52,95 @@ tbd
<ul>
<li>Fixed Gallium glDrawPixels(GL_DEPTH_COMPONENT).
<li>Fixed Gallium Cell driver to buildable, runable state
<li>Fixed bad error checking for glFramebufferRenderbuffer(attachment=GL_DEPTH_STENCIL_ATTACHMENT).
<li>Fixed incorrect Z coordinate handling in "meta" glDraw/CopyPixels.
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=23670">Bug
#23670</a>.</li>

<li>Assorted i965 driver fixes.
Including but not limited to:
<ul>
<li>Fix scissoring when width or height is
0. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27643">Bug
#27643</a>.
<li>Fix bit allocation for number of color regions for
ARB_draw_buffers.</li>
<li>Set the correct provoking vertex for clipped first-mode
trifans. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24470">Bug
#24470</a>.</li>
<li>Use <code>R16G16B16A16_FLOAT</code> for 3-component half-float.</li>
<li>Fix assertion for surface tile offset usage on Ironlake.</li>
<li>Fix cube map layouts on Ironlake.</li>
<li>When an RB gets a new region, clear the old from the state
cache. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24119">Bug
#24119</a>.</li>
<li>Reject shaders with uninlined function calls instead of hanging.</li>
</ul>
</li>

<li>Assorted i915 driver fixes. Including but not limited to:
<ul>
<li>Fixed texture LOD clamping in i915 driver.
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=24846">Bug
#24846</a>.</li>
<li>Fix off-by-one for drawing rectangle.
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=27408">Bug
#27408</a>.</li>
</ul>
</li>

<li>Fixed hangs in etracer on 830 and 845
chipsets. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=26557">Bug
#26557</a>.</li>
<li>Fixed tiling of small textures on all Intel drivers.</li>
<li>Fixed crash in Savage driver when using <code>_mesa_CopyTexImage2D</code>.
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=27652">Bug
#27652</a>.</li>

<li>Assorted GLX fixes. Including but not limited to:
<ul>
<li>Fixed <code>__glXInitializeVisualConfigFromTags</code>'s handling of
unrecognized fbconfig tags.</li>
<li>Fixed regression with <code>GLX_USE_GL</code>.
<li>Fixed config chooser logic for 'mask' matching.</li>
<li>Report swap events correctly in direct rendered case (DRI2)</li>
<li>Fixed build with dri2proto which doesn't define
<code>X_DRI2SwapInterval</code>.</li>
<li>Get <code>GLX_SCREEN</code> first in <code>__glXQueryContextInfo</code>.
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=14245">Bug
#14245</a>.</li>
</ul>
</li>

<li>Assorted GLSL fixes. Including but not limited to:
<ul>
<li>Change variable declared assertion into conditional in GLSL
compiler. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27921">Bug
#27921</a>.</li>
<li>Fix instruction indexing
bugs. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27566">Bug
#27566</a>.</li>
<li>Updated uniform location / offset encoding to be more like
other implementations.</li>
<li>Don't overwrite a driver's shader infolog with generic failure
message.</li>
</ul>
</li>

<li>Fixed OSMesa build for 16 and 32-bit color channel depth.
<li>Fixed OSMesa build with hidden symbol visibility. libOSMesa no longer links to libGL.
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=28305">Bug
#28305</a>.
<li>Fixed handling of multiple render targets in fixed-function
texture envrionmnent programs.</li>
<li>Fixed conversion errors in <code>signed_rgba8888[rev]</code> texel
fetch.</li>
<li>Don't set srcLevel on <code>GL_TEXTURE_RECTANGLE_ARB</code> targets.</li>
<li>Various build fixes for OpenBSD.</li>
<li>Various build fixes for OS X.</li>
<li>Various build fixes for GCC 3.3.</li>
</ul>


<h2>Changes</h2>
<p>None.</p>
</body>
</html>

+ 89
- 0
docs/relnotes-7.8.3.html Vedi File

@@ -0,0 +1,89 @@
<HTML>

<TITLE>Mesa Release Notes</TITLE>

<head><link rel="stylesheet" type="text/css" href="mesa.css"></head>

<BODY>

<body bgcolor="#eeeeee">

<H1>Mesa 7.8.3 Release Notes / (date tbd)</H1>

<p>
Mesa 7.8.3 is a bug fix release which fixes bugs found since the 7.8.2 release.
</p>
<p>
Mesa 7.8.3 implements the OpenGL 2.1 API, but the version reported by
glGetString(GL_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 2.1.
</p>
<p>
See the <a href="install.html">Compiling/Installing page</a> for prerequisites
for DRI hardware acceleration.
</p>


<h2>MD5 checksums</h2>
<pre>
x MesaLib-7.8.3.tar.gz
x MesaLib-7.8.3.tar.bz2
x MesaLib-7.8.3.zip
x MesaDemos-7.8.3.tar.gz
x MesaDemos-7.8.3.tar.bz2
x MesaDemos-7.8.3.zip
x MesaGLUT-7.8.3.tar.gz
x MesaGLUT-7.8.3.tar.bz2
x MesaGLUT-7.8.3.zip
</pre>


<h2>New features</h2>
<p>None.</p>


<h2>Changes</h2>
<ul>
<li>The radeon driver should use less memory when searching for a valid mip
image.</li>
</ul>


<h2>Bug fixes</h2>
<ul>
<li>Fix unsupported FB with D24S8 (bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=23670">29116</a>)</li>
<li>Fix ReadPixels crash when reading depth/stencil from an FBO</li>
<li>Fixed a bug rendering to 16-bit buffers using swrast.</li>
<li>Fixed a state tracker/TGSI bug that caused crashes when using Windows'
memory debugging features.</li>
<li>Fixed an issue rendering to 32-bit channels with swrast (bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=29487">29487</a>)</li>
<li>GLSL: fix indirect <TT>gl_TextureMatrix</TT> addressing (bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=28967">28967</a>)</li>
<li>GLSL: fix for bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=27216">27216</a></li>
<li>GLSL: fix zw fragcoord entries in some cases (bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=29183">29183</a>)</li>
<li>Fix texture env generation in some cases (bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=28169">28169</a>)</li>
<li>osmesa: a fix for calling <TT>OSMesaMakeCurrent</TT> twice was applied (bug
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=10966">10966</a></li>
<li>A bug was fixed which could cause Mesa to ignore the
<TT>MESA_EXTENSION_OVERRIDE</TT> environment variable.</li>
<li>A bug related to specular highlights on backfaces was fixed.</li>
<li>A radeon-specific issue with <TT>glCopyTex(Sub)Image</TT> was
corrected.</li>
<li>radeon/wine: flush command stream in more cases, fixing wine d3d9
tests.</li>
<li>r600: fix sin+cos normalization.</li>
<li>r600: (properly) ignore <TT>GL_COORD_REPLACE</TT> when point sprites are
disabled.</li>
<li>radeon: avoid flushing when the context is not current.</li>
<li>r300c: a bug affecting unaligned BOs was fixed.</li>
<li>r300c: a hardlock caused by ARB_half_float_vertex incorrectly advertised on some chipsets.</li>
</ul>


</body>
</html>

+ 79
- 5
docs/relnotes-7.9.html Vedi File

@@ -8,7 +8,7 @@

<body bgcolor="#eeeeee">

<H1>Mesa 7.9 Release Notes / date TBD</H1>
<H1>Mesa 7.9 Release Notes / October 4, 2010</H1>

<p>
Mesa 7.9 is a new development release.
@@ -28,7 +28,12 @@ for DRI hardware acceleration.

<h2>MD5 checksums</h2>
<pre>
tbd
ed65ab425b25895c7f473d0a5e6e64f8 MesaLib-7.9.tar.gz
82c740c49d572baa6da2b1a1eee90bca MesaLib-7.9.tar.bz2
cd2b6ecec759b0457475e94bbb38fedb MesaLib-7.9.zip
7b54af9fb9b1f6a1a65db2520f50848f MesaGLUT-7.9.tar.gz
20d07419d1929f833fdb36bced290ad5 MesaGLUT-7.9.tar.bz2
62a7edecd7c92675cd6029b05217eb0a MesaGLUT-7.9.zip
</pre>


@@ -37,16 +42,85 @@ tbd
<li>New, improved GLSL compiler written by Intel.
See the <a href="shading.html"> Shading Language</a> page for
more information.
<li>GL_EXT_timer_query extension (i965 driver only)
<li>New, very experimental Gallium driver for R600-R700 Radeons.
<li>Support for AMD Evergreen-based Radeons (HD 5xxx)
<li>GL_EXT_timer_query extension (i965 driver and softpipe only)
<li>GL_EXT_framebuffer_multisample extension (intel drivers, MAX_SAMPLES = 1)
<li>GL_ARB_texture_swizzle extension (alias of GL_EXT_texture_swizzle)
<li>GL_ARB_draw_elements_base_vertex, GL_ARB_fragment_program_shadow
and GL_EXT_draw_buffers2 in Gallium drivers
<li>GL_ARB_draw_elements_base_vertex, GL_ARB_fragment_program_shadow,
GL_ARB_window_pos, GL_EXT_gpu_program_parameters,
GL_ATI_texture_env_combine3, GL_MESA_pack_invert, and GL_OES_EGL_image
extensions in Gallium drivers
<li>GL_ARB_depth_clamp and GL_NV_depth_clamp extensions (in nv50 and r600
Gallium drivers)
<li>GL_ARB_half_float_vertex extension (in nvfx, r300, r600, softpipe,
and llvmpipe Gallium drivers)
<li>GL_EXT_draw_buffers2 (in nv50, r600, softpipe, and llvmpipe Gallium
drivers)
<li>GL_EXT_texture_swizzle (in nvfx, r300, r600, softpipe, and llvmpipe
Gallium drivers)
<li>GL_ATI_texture_mirror_once (in nvfx, nv50, r300, r600, softpipe, and
llvmpipe Gallium drivers)
<li>GL_NV_conditional_render (in r300 Gallium driver)
<li>Initial "signs of life" support for Sandybridge hardware in i965 DRI
driver.
</ul>


<h2>Bug fixes</h2>
<p>This list is likely incomplete.</p>
<ul>
<li>Massive improvements to the Gallium driver for R300-R500 Radeons; this
driver is now considered stable for use as a DRI (OpenGL) driver.
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=10908">Bug 10908</a> - GLSL: gl_FogParamaters gl_Fog built-in uniform not functioning</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=13753">Bug 13753</a> - Numerous bugs in GLSL uniform handling</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=16854">Bug 16854</a> - GLSL function call at global scope causes SEGV</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=16856">Bug 16856</a> - GLSL indexing of unsized array results in assertion failure</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=18659">Bug 18659</a> - Crash in shader/slang/slang_codegen.c _slang_gen_function_call_name()</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=19089">Bug 19089</a> - [GLSL] glsl1/shadow2D() cases fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=22622">Bug 22622</a> - [GM965 GLSL] noise*() cause GPU lockup</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=23743">Bug 23743</a> - For loop from 0 to 0 not optimized out</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=24553">Bug 24553</a> - shader compilation times explode when using more () pairs</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25664">Bug 25664</a> - [GLSL] re-declaring an empty array fails to compile</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25769">Bug 25769</a> - [GLSL] "float" can be implicitly converted to "int"</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25808">Bug 25808</a> - [GLSL] const variable is modified successfully</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25826">Bug 25826</a> - [GLSL] declaring an unsized array then re-declaring with a size fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25827">Bug 25827</a> - [GLSL] vector constructor accepts too many arguments successfully</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25829">Bug 25829</a> - [GLSL] allowing non-void function without returning value</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25830">Bug 25830</a> - [GLSL] allowing non-constant-expression as const declaration initializer</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25877">Bug 25877</a> - [GLSL 1.10] implicit conversion from "int" to "float" should not be allowed</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25878">Bug 25878</a> - [GLSL] sampler is converted to int successfully</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25994">Bug 25994</a> - [GM45][GLSL] 'return' statement in vertex shader unsupported</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25999">Bug 25999</a> - [GLSL] embedded structure constructor fails to compile</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26000">Bug 26000</a> - [GLSL] allowing different parameter qualifier between the function definition and declaration</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26001">Bug 26001</a> - [GLSL 1.10] constructing matrix from matrix succeeds</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26224">Bug 26224</a> - [GLSL] Cannot get location of a uniform struct member</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26990">Bug 26990</a> - [GLSL] variable declaration in "while" fails to compile</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27004">Bug 27004</a> - [GLSL] allowing macro redefinition</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27060">Bug 27060</a> - [965] piglit glsl-fs-raytrace failure due to lack of function calls.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27216">Bug 27216</a> - Assignment with a function call in an if statement causes an assertion failure</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27261">Bug 27261</a> - GLSL Compiler fails on the following vertex shader</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27265">Bug 27265</a> - GLSL Compiler doesnt link the attached vertex shader</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27388">Bug 27388</a> - [i965] piglit glsl-vs-arrays failure</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27403">Bug 27403</a> - GLSL struct causing "Invalid src register file ..." error</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27914">Bug 27914</a> - GLSL compiler uses MUL+ADD where it could use MAD</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28055">Bug 28055</a> - glsl-texcoord-array fails GLSL compilation</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28374">Bug 28374</a> - SIGSEGV shader/slang/slang_typeinfo.c:534</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28748">Bug 28748</a> - [i965] uninlined function calls support</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28833">Bug 28833</a> - piglit/shaders/glsl-texcoord-array fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28834">Bug 28834</a> - Add support for system fpclassify to GL_OES_query_matrix function for OpenBSD / NetBSD</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28837">Bug 28837</a> - varying vec4 index support</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28845">Bug 28845</a> - The GLU tesselator code has some warnings</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28889">Bug 28889</a> - [regression] wine game crash</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28894">Bug 28894</a> - slang build fails if absolute path contains spaces</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28913">Bug 28913</a> - [GLSL] allowing two version statements</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28931">Bug 28931</a> - Floating Point Exception in Warzone2100 Trunk version</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28966">Bug 28966</a> - [r300g] Dynamic branching 3 demo does not run</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28967">Bug 28967</a> - slang/slang_emit.c:350: storage_to_src_reg: Assertion `index &gt;= 0' failed.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29013">Bug 29013</a> - [r300g] translate_rgb_op: unknown opcode ILLEGAL OPCODE</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29020">Bug 29020</a> - [r300g] Wine d3d9 tests hardlock</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29910">Bug 29910</a> - Mesa advertises bogus GL_ARB_shading_language_120</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=30196">Bug 30196</a> - [GLSL] gl_TextureMatrix{Inverse,Transpose,InverseTranspose} unsupported</li>
</ul>



+ 1
- 0
docs/relnotes.html Vedi File

@@ -14,6 +14,7 @@ The release notes summarize what's new or changed in each Mesa release.

<UL>
<LI><A HREF="relnotes-7.9.html">7.9 release notes</A>
<LI><A HREF="relnotes-7.8.3.html">7.8.3 release notes</A>
<LI><A HREF="relnotes-7.8.2.html">7.8.2 release notes</A>
<LI><A HREF="relnotes-7.8.1.html">7.8.1 release notes</A>
<LI><A HREF="relnotes-7.8.html">7.8 release notes</A>

+ 2
- 2
include/GL/internal/dri_interface.h Vedi File

@@ -805,7 +805,7 @@ typedef struct __DRIimageExtensionRec __DRIimageExtension;
struct __DRIimageExtensionRec {
__DRIextension base;

__DRIimage *(*createImageFromName)(__DRIcontext *context,
__DRIimage *(*createImageFromName)(__DRIscreen *screen,
int width, int height, int format,
int name, int pitch,
void *loaderPrivate);
@@ -841,7 +841,7 @@ typedef struct __DRIimageLookupExtensionRec __DRIimageLookupExtension;
struct __DRIimageLookupExtensionRec {
__DRIextension base;

__DRIimage *(*lookupEGLImage)(__DRIcontext *context, void *image,
__DRIimage *(*lookupEGLImage)(__DRIscreen *screen, void *image,
void *loaderPrivate);
};


+ 2
- 0
scons/crossmingw.py Vedi File

@@ -54,11 +54,13 @@ prefixes32 = SCons.Util.Split("""
i586-mingw32msvc-
i686-mingw32msvc-
i686-pc-mingw32-
i686-w64-mingw32-
""")
prefixes64 = SCons.Util.Split("""
amd64-mingw32-
amd64-mingw32msvc-
amd64-pc-mingw32-
x86_64-w64-mingw32-
""")

def find(env):

+ 43
- 21
scons/gallium.py Vedi File

@@ -49,14 +49,14 @@ def symlink(target, source, env):
os.symlink(os.path.basename(source), target)

def install(env, source, subdir):
target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build'], subdir)
target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'], subdir)
env.Install(target_dir, source)

def install_program(env, source):
install(env, source, 'bin')

def install_shared_library(env, sources, version = ()):
install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build'])
install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'])
version = tuple(map(str, version))
if env['SHLIBSUFFIX'] == '.dll':
dlls = env.FindIxes(sources, 'SHLIBPREFIX', 'SHLIBSUFFIX')
@@ -130,7 +130,6 @@ def generate(env):
env['msvc'] = env['CC'] == 'cl'

# shortcuts
debug = env['debug']
machine = env['machine']
platform = env['platform']
x86 = env['machine'] == 'x86'
@@ -138,20 +137,42 @@ def generate(env):
gcc = env['gcc']
msvc = env['msvc']

# Backwards compatability with the debug= profile= options
if env['build'] == 'debug':
if not env['debug']:
print 'scons: debug option is deprecated: use instead build=release'
env['build'] = 'release'
if env['profile']:
print 'scons: profile option is deprecated: use instead build=profile'
env['build'] = 'profile'
if False:
# Enforce SConscripts to use the new build variable
env.popitem('debug')
env.popitem('profile')
else:
# Backwards portability with older sconscripts
if env['build'] in ('debug', 'checked'):
env['debug'] = True
env['profile'] = False
if env['build'] == 'profile':
env['debug'] = False
env['profile'] = True
if env['build'] == 'release':
env['debug'] = False
env['profile'] = False

# Put build output in a separate dir, which depends on the current
# configuration. See also http://www.scons.org/wiki/AdvancedBuildExample
build_topdir = 'build'
build_subdir = env['platform']
if env['machine'] != 'generic':
build_subdir += '-' + env['machine']
if env['debug']:
build_subdir += "-debug"
if env['profile']:
build_subdir += "-profile"
if env['build'] != 'release':
build_subdir += '-' + env['build']
build_dir = os.path.join(build_topdir, build_subdir)
# Place the .sconsign file in the build dir too, to avoid issues with
# different scons versions building the same source file
env['build'] = build_dir
env['build_dir'] = build_dir
env.SConsignFile(os.path.join(build_dir, '.sconsign'))
if 'SCONS_CACHE_DIR' in os.environ:
print 'scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],)
@@ -165,11 +186,11 @@ def generate(env):

# C preprocessor options
cppdefines = []
if debug:
if env['build'] in ('debug', 'checked'):
cppdefines += ['DEBUG']
else:
cppdefines += ['NDEBUG']
if env['profile']:
if env['build'] == 'profile':
cppdefines += ['PROFILE']
if platform == 'windows':
cppdefines += [
@@ -190,7 +211,7 @@ def generate(env):
'_SCL_SECURE_NO_WARNINGS',
'_SCL_SECURE_NO_DEPRECATE',
]
if debug:
if env['build'] in ('debug', 'checked'):
cppdefines += ['_DEBUG']
if env['toolchain'] == 'winddk':
# Mimic WINDDK's builtin flags. See also:
@@ -217,7 +238,7 @@ def generate(env):
('__BUILDMACHINE__', 'WinDDK'),
('FPO', '0'),
]
if debug:
if env['build'] in ('debug', 'checked'):
cppdefines += [('DBG', 1)]
if platform == 'wince':
cppdefines += [
@@ -253,15 +274,16 @@ def generate(env):
ccflags = [] # C & C++
if gcc:
ccversion = env['CCVERSION']
if debug:
ccflags += ['-O0', '-g3']
if env['build'] == 'debug':
ccflags += ['-O0']
elif ccversion.startswith('4.2.'):
# gcc 4.2.x optimizer is broken
print "warning: gcc 4.2.x optimizer is broken -- disabling optimizations"
ccflags += ['-O0', '-g3']
ccflags += ['-O0']
else:
ccflags += ['-O3', '-g3']
if env['profile']:
ccflags += ['-O3']
ccflags += ['-g3']
if env['build'] in ('checked', 'profile'):
# See http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#Which_options_should_I_pass_to_gcc_when_compiling_for_profiling?
ccflags += [
'-fno-omit-frame-pointer',
@@ -320,7 +342,7 @@ def generate(env):
# See also:
# - http://msdn.microsoft.com/en-us/library/19z1t1wy.aspx
# - cl /?
if debug:
if env['build'] == 'debug':
ccflags += [
'/Od', # disable optimizations
'/Oi', # enable intrinsic functions
@@ -389,7 +411,7 @@ def generate(env):
if env['platform'] == 'windows' and msvc:
# Choose the appropriate MSVC CRT
# http://msdn.microsoft.com/en-us/library/2kzt1wy3.aspx
if env['debug']:
if env['build'] in ('debug', 'checked'):
env.Append(CCFLAGS = ['/MTd'])
env.Append(SHCCFLAGS = ['/LDd'])
else:
@@ -421,7 +443,7 @@ def generate(env):
else:
env['_LIBFLAGS'] = '-Wl,--start-group ' + env['_LIBFLAGS'] + ' -Wl,--end-group'
if msvc:
if not env['debug']:
if env['build'] != 'debug':
# enable Link-time Code Generation
linkflags += ['/LTCG']
env.Append(ARFLAGS = ['/LTCG'])
@@ -460,7 +482,7 @@ def generate(env):

'/entry:DrvEnableDriver',
]
if env['debug'] or env['profile']:
if env['build'] != 'release':
linkflags += [
'/MAP', # http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx
]

+ 1
- 1
scons/llvm.py Vedi File

@@ -124,7 +124,7 @@ def generate(env):
# Some of the LLVM C headers use the inline keyword without
# defining it.
env.Append(CPPDEFINES = [('inline', '__inline')])
if env['debug']:
if env['build'] in ('debug', 'checked'):
# LLVM libraries are static, build with /MT, and they
# automatically link agains LIBCMT. When we're doing a
# debug build we'll be linking against LIBCMTD, so disable

+ 1
- 1
scons/wcesdk.py Vedi File

@@ -122,7 +122,7 @@ def get_wce600_paths(env):
host_cpu = os.environ.get('_HOSTCPUTYPE', 'i386')
target_cpu = os.environ.get('_TGTCPU', 'x86')

if env['debug']:
if env['build'] == 'debug':
build = 'debug'
else:
build = 'retail'

+ 55
- 12
src/egl/drivers/dri2/egl_dri2.c Vedi File

@@ -292,7 +292,7 @@ dri2_process_buffers(struct dri2_egl_surface *dri2_surf,
struct dri2_egl_display *dri2_dpy =
dri2_egl_display(dri2_surf->base.Resource.Display);
xcb_rectangle_t rectangle;
int i;
unsigned i;

dri2_surf->buffer_count = count;
dri2_surf->have_fake_front = 0;
@@ -339,6 +339,8 @@ dri2_get_buffers(__DRIdrawable * driDrawable,
xcb_dri2_get_buffers_reply_t *reply;
xcb_dri2_get_buffers_cookie_t cookie;

(void) driDrawable;

cookie = xcb_dri2_get_buffers_unchecked (dri2_dpy->conn,
dri2_surf->drawable,
count, count, attachments);
@@ -360,23 +362,28 @@ dri2_get_buffers(__DRIdrawable * driDrawable,
static void
dri2_flush_front_buffer(__DRIdrawable * driDrawable, void *loaderPrivate)
{
(void) driDrawable;

/* FIXME: Does EGL support front buffer rendering at all? */

#if 0
struct dri2_egl_surface *dri2_surf = loaderPrivate;

dri2WaitGL(dri2_surf);
#else
(void) loaderPrivate;
#endif
}

static __DRIimage *
dri2_lookup_egl_image(__DRIcontext *context, void *image, void *data)
dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data)
{
struct dri2_egl_context *dri2_ctx = data;
_EGLDisplay *disp = dri2_ctx->base.Resource.Display;
_EGLDisplay *disp = data;
struct dri2_egl_image *dri2_img;
_EGLImage *img;

(void) screen;

img = _eglLookupImage(image, disp);
if (img == NULL) {
_eglError(EGL_BAD_PARAMETER, "dri2_lookup_egl_image");
@@ -407,6 +414,8 @@ dri2_get_buffers_with_format(__DRIdrawable * driDrawable,
xcb_dri2_get_buffers_with_format_cookie_t cookie;
xcb_dri2_attach_format_t *format_attachments;

(void) driDrawable;

format_attachments = (xcb_dri2_attach_format_t *) attachments;
cookie = xcb_dri2_get_buffers_with_format_unchecked (dri2_dpy->conn,
dri2_surf->drawable,
@@ -440,14 +449,14 @@ struct dri2_extension_match {
static struct dri2_extension_match dri2_driver_extensions[] = {
{ __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
{ __DRI_DRI2, 1, offsetof(struct dri2_egl_display, dri2) },
{ NULL }
{ NULL, 0, 0 }
};

static struct dri2_extension_match dri2_core_extensions[] = {
{ __DRI2_FLUSH, 1, offsetof(struct dri2_egl_display, flush) },
{ __DRI_TEX_BUFFER, 2, offsetof(struct dri2_egl_display, tex_buffer) },
{ __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
{ NULL }
{ NULL, 0, 0 }
};

static EGLBoolean
@@ -728,7 +737,7 @@ dri2_create_screen(_EGLDisplay *disp)
dri2_dpy = disp->DriverData;
dri2_dpy->dri_screen =
dri2_dpy->dri2->createNewScreen(0, dri2_dpy->fd, dri2_dpy->extensions,
&dri2_dpy->driver_configs, dri2_dpy);
&dri2_dpy->driver_configs, disp);

if (dri2_dpy->dri_screen == NULL) {
_eglLog(_EGL_WARNING, "DRI2: failed to create dri screen");
@@ -772,6 +781,8 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp,
{
struct dri2_egl_display *dri2_dpy;

(void) drv;

dri2_dpy = malloc(sizeof *dri2_dpy);
if (!dri2_dpy)
return _eglError(EGL_BAD_ALLOC, "eglInitialize");
@@ -1075,6 +1086,8 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
const __DRIconfig *dri_config;
int api;

(void) drv;

dri2_ctx = malloc(sizeof *dri2_ctx);
if (!dri2_ctx) {
_eglError(EGL_BAD_ALLOC, "eglCreateContext");
@@ -1146,6 +1159,8 @@ dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);

(void) drv;

if (_eglIsSurfaceBound(surf))
return EGL_TRUE;

@@ -1221,6 +1236,8 @@ dri2_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
xcb_screen_iterator_t s;
xcb_generic_error_t *error;

(void) drv;

dri2_surf = malloc(sizeof *dri2_surf);
if (!dri2_surf) {
_eglError(EGL_BAD_ALLOC, "dri2_create_surface");
@@ -1369,7 +1386,7 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw,
xcb_rectangle_t rectangles[16];
int i;

if (numRects > ARRAY_SIZE(rectangles))
if (numRects > (int)ARRAY_SIZE(rectangles))
return dri2_copy_region(drv, disp, draw, dri2_surf->region);

/* FIXME: Invert y here? */
@@ -1394,6 +1411,8 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw,
static _EGLProc
dri2_get_proc_address(_EGLDriver *drv, const char *procname)
{
(void) drv;

/* FIXME: Do we need to support lookup of EGL symbols too? */

return (_EGLProc) _glapi_get_proc_address(procname);
@@ -1405,6 +1424,8 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface);

(void) drv;

/* FIXME: If EGL allows frontbuffer rendering for window surfaces,
* we need to copy fake to real here.*/

@@ -1416,6 +1437,9 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
static EGLBoolean
dri2_wait_native(_EGLDriver *drv, _EGLDisplay *disp, EGLint engine)
{
(void) drv;
(void) disp;

if (engine != EGL_CORE_NATIVE_ENGINE)
return _eglError(EGL_BAD_PARAMETER, "eglWaitNative");
/* glXWaitX(); */
@@ -1438,6 +1462,8 @@ dri2_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
xcb_gcontext_t gc;

(void) drv;

(*dri2_dpy->flush->flush)(dri2_surf->dri_drawable);

gc = xcb_generate_id(dri2_dpy->conn);
@@ -1501,6 +1527,11 @@ static EGLBoolean
dri2_release_tex_image(_EGLDriver *drv,
_EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
{
(void) drv;
(void) disp;
(void) surf;
(void) buffer;

return EGL_TRUE;
}

@@ -1509,7 +1540,6 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
EGLClientBuffer buffer, const EGLint *attr_list)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
struct dri2_egl_image *dri2_img;
unsigned int attachments[1];
xcb_drawable_t drawable;
@@ -1521,6 +1551,8 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
xcb_generic_error_t *error;
int stride, format;

(void) ctx;

drawable = (xcb_drawable_t) buffer;
xcb_dri2_create_drawable (dri2_dpy->conn, drawable);
attachments[0] = XCB_DRI2_ATTACHMENT_BUFFER_FRONT_LEFT;
@@ -1577,7 +1609,7 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,

stride = buffers[0].pitch / buffers[0].cpp;
dri2_img->dri_image =
dri2_dpy->image->createImageFromName(dri2_ctx->dri_context,
dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
buffers_reply->width,
buffers_reply->height,
format,
@@ -1628,10 +1660,11 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
EGLClientBuffer buffer, const EGLint *attr_list)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
struct dri2_egl_image *dri2_img;
EGLint width, height, format, name, stride, pitch, i, err;

(void) ctx;

name = (EGLint) buffer;

err = EGL_SUCCESS;
@@ -1697,7 +1730,7 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
}

dri2_img->dri_image =
dri2_dpy->image->createImageFromName(dri2_ctx->dri_context,
dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
width,
height,
format,
@@ -1718,6 +1751,8 @@ dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
_EGLContext *ctx, EGLenum target,
EGLClientBuffer buffer, const EGLint *attr_list)
{
(void) drv;

switch (target) {
case EGL_NATIVE_PIXMAP_KHR:
return dri2_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
@@ -1737,6 +1772,8 @@ dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_image *dri2_img = dri2_egl_image(image);

(void) drv;

dri2_dpy->image->destroyImage(dri2_img->dri_image);
free(dri2_img);

@@ -1753,6 +1790,8 @@ dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
unsigned int use, dri_use, valid_mask;
EGLint err = EGL_SUCCESS;

(void) drv;

dri2_img = malloc(sizeof *dri2_img);
if (!dri2_img) {
_eglError(EGL_BAD_ALLOC, "dri2_create_image_khr");
@@ -1853,6 +1892,8 @@ dri2_export_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img,
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_image *dri2_img = dri2_egl_image(img);

(void) drv;

if (name && !dri2_dpy->image->queryImage(dri2_img->dri_image,
__DRI_IMAGE_ATTRIB_NAME, name)) {
_eglError(EGL_BAD_ALLOC, "dri2_export_drm_image_mesa");
@@ -1879,6 +1920,8 @@ _eglMain(const char *args)
{
struct dri2_egl_driver *dri2_drv;

(void) args;

dri2_drv = malloc(sizeof *dri2_drv);
if (!dri2_drv)
return NULL;

+ 31
- 2
src/egl/drivers/glx/egl_glx.c Vedi File

@@ -162,7 +162,8 @@ static EGLBoolean
convert_fbconfig(Display *dpy, GLXFBConfig fbconfig,
struct GLX_egl_config *GLX_conf)
{
int err = 0, attr, egl_attr, val, i;
int err = 0, attr, egl_attr, val;
unsigned i;
EGLint conformant, config_caveat, surface_type;

for (i = 0; i < ARRAY_SIZE(fbconfig_attributes); i++) {
@@ -243,7 +244,8 @@ static EGLBoolean
convert_visual(Display *dpy, XVisualInfo *vinfo,
struct GLX_egl_config *GLX_conf)
{
int err, attr, egl_attr, val, i;
int err, attr, egl_attr, val;
unsigned i;
EGLint conformant, config_caveat, surface_type;

/* the visual must support OpenGL */
@@ -457,6 +459,8 @@ GLX_eglInitialize(_EGLDriver *drv, _EGLDisplay *disp,
{
struct GLX_egl_display *GLX_dpy;

(void) drv;

if (disp->Platform != _EGL_PLATFORM_X11)
return EGL_FALSE;

@@ -541,6 +545,8 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp);
struct GLX_egl_context *GLX_ctx_shared = GLX_egl_context(share_list);

(void) drv;

if (!GLX_ctx) {
_eglError(EGL_BAD_ALLOC, "eglCreateContext");
return NULL;
@@ -604,6 +610,8 @@ GLX_eglMakeCurrent(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
GLXContext cctx;
EGLBoolean ret = EGL_FALSE;

(void) drv;

/* bind the new context and return the "orphaned" one */
if (!_eglBindContext(&ctx, &dsurf, &rsurf))
return EGL_FALSE;
@@ -656,6 +664,8 @@ GLX_eglCreateWindowSurface(_EGLDriver *drv, _EGLDisplay *disp,
struct GLX_egl_surface *GLX_surf;
uint width, height;

(void) drv;

GLX_surf = CALLOC_STRUCT(GLX_egl_surface);
if (!GLX_surf) {
_eglError(EGL_BAD_ALLOC, "eglCreateWindowSurface");
@@ -702,6 +712,8 @@ GLX_eglCreatePixmapSurface(_EGLDriver *drv, _EGLDisplay *disp,
struct GLX_egl_surface *GLX_surf;
uint width, height;

(void) drv;

GLX_surf = CALLOC_STRUCT(GLX_egl_surface);
if (!GLX_surf) {
_eglError(EGL_BAD_ALLOC, "eglCreatePixmapSurface");
@@ -762,6 +774,8 @@ GLX_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp,
int attribs[5];
int i;

(void) drv;

GLX_surf = CALLOC_STRUCT(GLX_egl_surface);
if (!GLX_surf) {
_eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface");
@@ -820,6 +834,8 @@ GLX_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp,
static EGLBoolean
GLX_eglDestroySurface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
{
(void) drv;

if (!_eglIsSurfaceBound(surf))
destroy_surface(disp, surf);

@@ -833,6 +849,8 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp);
struct GLX_egl_surface *GLX_surf = GLX_egl_surface(draw);

(void) drv;

glXSwapBuffers(GLX_dpy->dpy, GLX_surf->glx_drawable);

return EGL_TRUE;
@@ -844,12 +862,18 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
static _EGLProc
GLX_eglGetProcAddress(_EGLDriver *drv, const char *procname)
{
(void) drv;

return (_EGLProc) glXGetProcAddress((const GLubyte *) procname);
}

static EGLBoolean
GLX_eglWaitClient(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
{
(void) drv;
(void) dpy;
(void) ctx;

glXWaitGL();
return EGL_TRUE;
}
@@ -857,6 +881,9 @@ GLX_eglWaitClient(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
static EGLBoolean
GLX_eglWaitNative(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine)
{
(void) drv;
(void) dpy;

if (engine != EGL_CORE_NATIVE_ENGINE)
return _eglError(EGL_BAD_PARAMETER, "eglWaitNative");
glXWaitX();
@@ -880,6 +907,8 @@ _eglMain(const char *args)
{
struct GLX_egl_driver *GLX_drv = CALLOC_STRUCT(GLX_egl_driver);

(void) args;

if (!GLX_drv)
return NULL;


+ 1
- 1
src/egl/main/Makefile Vedi File

@@ -57,7 +57,7 @@ EGL_NATIVE_PLATFORM=_EGL_INVALID_PLATFORM
ifeq ($(firstword $(EGL_PLATFORMS)),x11)
EGL_NATIVE_PLATFORM=_EGL_PLATFORM_X11
endif
ifeq ($(firstword $(EGL_PLATFORMS)),kms)
ifeq ($(firstword $(EGL_PLATFORMS)),drm)
EGL_NATIVE_PLATFORM=_EGL_PLATFORM_DRM
endif
ifeq ($(firstword $(EGL_PLATFORMS)),fbdev)

+ 41
- 11
src/egl/main/eglapi.c Vedi File

@@ -402,10 +402,15 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list,
_EGLContext *context;
EGLContext ret;

if (config)
_EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv);
else
_EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);
_EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);

if (!config) {
/* config may be NULL if surfaceless */
if (!disp->Extensions.KHR_surfaceless_gles1 &&
!disp->Extensions.KHR_surfaceless_gles2 &&
!disp->Extensions.KHR_surfaceless_opengl)
RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT);
}

if (!share && share_list != EGL_NO_CONTEXT)
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_CONTEXT);
@@ -459,9 +464,19 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,

if (!context && ctx != EGL_NO_CONTEXT)
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_FALSE);
if ((!draw_surf && draw != EGL_NO_SURFACE) ||
(!read_surf && read != EGL_NO_SURFACE))
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
if (!draw_surf || !read_surf) {
/* surfaces may be NULL if surfaceless */
if (!disp->Extensions.KHR_surfaceless_gles1 &&
!disp->Extensions.KHR_surfaceless_gles2 &&
!disp->Extensions.KHR_surfaceless_opengl)
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);

if ((!draw_surf && draw != EGL_NO_SURFACE) ||
(!read_surf && read != EGL_NO_SURFACE))
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
if (draw_surf || read_surf)
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
}

ret = drv->API.MakeCurrent(drv, disp, draw_surf, read_surf, context);

@@ -1276,6 +1291,8 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target,
EGLImageKHR ret;

_EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
if (!disp->Extensions.KHR_image_base)
RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR);
if (!context && ctx != EGL_NO_CONTEXT)
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_IMAGE_KHR);

@@ -1296,6 +1313,8 @@ eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image)
EGLBoolean ret;

_EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv);
if (!disp->Extensions.KHR_image_base)
RETURN_EGL_EVAL(disp, EGL_FALSE);
if (!img)
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE);

@@ -1321,6 +1340,8 @@ eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list)
EGLSyncKHR ret;

_EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv);
if (!disp->Extensions.KHR_reusable_sync)
RETURN_EGL_EVAL(disp, EGL_NO_SYNC_KHR);

sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list);
ret = (sync) ? _eglLinkSync(sync, disp) : EGL_NO_SYNC_KHR;
@@ -1338,6 +1359,8 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync)
EGLBoolean ret;

_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
assert(disp->Extensions.KHR_reusable_sync);

_eglUnlinkSync(s);
ret = drv->API.DestroySyncKHR(drv, disp, s);

@@ -1354,6 +1377,7 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t
EGLint ret;

_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
assert(disp->Extensions.KHR_reusable_sync);
ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout);

RETURN_EGL_EVAL(disp, ret);
@@ -1369,6 +1393,7 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode)
EGLBoolean ret;

_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
assert(disp->Extensions.KHR_reusable_sync);
ret = drv->API.SignalSyncKHR(drv, disp, s, mode);

RETURN_EGL_EVAL(disp, ret);
@@ -1384,6 +1409,7 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *v
EGLBoolean ret;

_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
assert(disp->Extensions.KHR_reusable_sync);
ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value);

RETURN_EGL_EVAL(disp, ret);
@@ -1407,14 +1433,14 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,

_EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv);

if (!disp->Extensions.NOK_swap_region)
RETURN_EGL_EVAL(disp, EGL_FALSE);

/* surface must be bound to current context in EGL 1.4 */
if (!ctx || !_eglIsContextLinked(ctx) || surf != ctx->DrawSurface)
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);

if (drv->API.SwapBuffersRegionNOK)
ret = drv->API.SwapBuffersRegionNOK(drv, disp, surf, numRects, rects);
else
ret = drv->API.SwapBuffers(drv, disp, surf);
ret = drv->API.SwapBuffersRegionNOK(drv, disp, surf, numRects, rects);

RETURN_EGL_EVAL(disp, ret);
}
@@ -1433,6 +1459,8 @@ eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list)
EGLImageKHR ret;

_EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
if (!disp->Extensions.MESA_drm_image)
RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR);

img = drv->API.CreateDRMImageMESA(drv, disp, attr_list);
ret = (img) ? _eglLinkImage(img, disp) : EGL_NO_IMAGE_KHR;
@@ -1450,6 +1478,8 @@ eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image,
EGLBoolean ret;

_EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv);
assert(disp->Extensions.MESA_drm_image);

if (!img)
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE);


+ 72
- 153
src/egl/main/eglconfig.c Vedi File

@@ -24,20 +24,18 @@
* IDs are from 1 to N respectively.
*/
void
_eglInitConfig(_EGLConfig *config, _EGLDisplay *dpy, EGLint id)
_eglInitConfig(_EGLConfig *conf, _EGLDisplay *dpy, EGLint id)
{
memset(config, 0, sizeof(*config));
memset(conf, 0, sizeof(*conf));

config->Display = dpy;
conf->Display = dpy;

/* some attributes take non-zero default values */
SET_CONFIG_ATTRIB(config, EGL_CONFIG_ID, id);
SET_CONFIG_ATTRIB(config, EGL_CONFIG_CAVEAT, EGL_NONE);
SET_CONFIG_ATTRIB(config, EGL_TRANSPARENT_TYPE, EGL_NONE);
SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_TYPE, EGL_NONE);
#ifdef EGL_VERSION_1_2
SET_CONFIG_ATTRIB(config, EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER);
#endif /* EGL_VERSION_1_2 */
conf->ConfigID = id;
conf->ConfigCaveat = EGL_NONE;
conf->TransparentType = EGL_NONE;
conf->NativeVisualType = EGL_NONE;
conf->ColorBufferType = EGL_RGB_BUFFER;
}


@@ -51,7 +49,7 @@ EGLConfig
_eglAddConfig(_EGLDisplay *dpy, _EGLConfig *conf)
{
/* sanity check */
assert(GET_CONFIG_ATTRIB(conf, EGL_CONFIG_ID) > 0);
assert(conf->ConfigID > 0);

if (!dpy->Configs) {
dpy->Configs = _eglCreateArray("Config", 16);
@@ -104,6 +102,7 @@ static const struct {
EGLint default_value;
} _eglValidationTable[] =
{
/* core */
{ EGL_BUFFER_SIZE, ATTRIB_TYPE_INTEGER,
ATTRIB_CRITERION_ATLEAST,
0 },
@@ -200,22 +199,13 @@ static const struct {
{ EGL_TRANSPARENT_BLUE_VALUE, ATTRIB_TYPE_INTEGER,
ATTRIB_CRITERION_EXACT,
EGL_DONT_CARE },
/* these are not real attributes */
{ EGL_MATCH_NATIVE_PIXMAP, ATTRIB_TYPE_PSEUDO,
ATTRIB_CRITERION_SPECIAL,
EGL_NONE },
/* there is a gap before EGL_SAMPLES */
{ 0x3030, ATTRIB_TYPE_PSEUDO,
ATTRIB_CRITERION_IGNORE,
0 },
{ EGL_NONE, ATTRIB_TYPE_PSEUDO,
ATTRIB_CRITERION_IGNORE,
0 },

/* extensions */
{ EGL_Y_INVERTED_NOK, ATTRIB_TYPE_BOOLEAN,
ATTRIB_CRITERION_EXACT,
EGL_DONT_CARE },

EGL_DONT_CARE }
};


@@ -232,18 +222,13 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
{
EGLint i, attr, val;
EGLBoolean valid = EGL_TRUE;
EGLint red_size = 0, green_size = 0, blue_size = 0, luminance_size = 0;
EGLint alpha_size = 0, buffer_size = 0;

/* all attributes should have been listed */
assert(ARRAY_SIZE(_eglValidationTable) == _EGL_CONFIG_NUM_ATTRIBS);

/* check attributes by their types */
for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) {
EGLint mask;

attr = _eglValidationTable[i].attr;
val = GET_CONFIG_ATTRIB(conf, attr);
val = _eglGetConfigKey(conf, attr);

switch (_eglValidationTable[i].type) {
case ATTRIB_TYPE_INTEGER:
@@ -255,30 +240,14 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
break;
case EGL_SAMPLE_BUFFERS:
/* there can be at most 1 sample buffer */
if (val > 1)
if (val > 1 || val < 0)
valid = EGL_FALSE;
break;
case EGL_RED_SIZE:
red_size = val;
break;
case EGL_GREEN_SIZE:
green_size = val;
break;
case EGL_BLUE_SIZE:
blue_size = val;
break;
case EGL_LUMINANCE_SIZE:
luminance_size = val;
break;
case EGL_ALPHA_SIZE:
alpha_size = val;
break;
case EGL_BUFFER_SIZE:
buffer_size = val;
default:
if (val < 0)
valid = EGL_FALSE;
break;
}
if (val < 0)
valid = EGL_FALSE;
break;
case ATTRIB_TYPE_BOOLEAN:
if (val != EGL_TRUE && val != EGL_FALSE)
@@ -366,17 +335,18 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)

/* now check for conflicting attribute values */

switch (GET_CONFIG_ATTRIB(conf, EGL_COLOR_BUFFER_TYPE)) {
switch (conf->ColorBufferType) {
case EGL_RGB_BUFFER:
if (luminance_size)
if (conf->LuminanceSize)
valid = EGL_FALSE;
if (red_size + green_size + blue_size + alpha_size != buffer_size)
if (conf->RedSize + conf->GreenSize +
conf->BlueSize + conf->AlphaSize != conf->BufferSize)
valid = EGL_FALSE;
break;
case EGL_LUMINANCE_BUFFER:
if (red_size || green_size || blue_size)
if (conf->RedSize || conf->GreenSize || conf->BlueSize)
valid = EGL_FALSE;
if (luminance_size + alpha_size != buffer_size)
if (conf->LuminanceSize + conf->AlphaSize != conf->BufferSize)
valid = EGL_FALSE;
break;
}
@@ -385,23 +355,19 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
return EGL_FALSE;
}

val = GET_CONFIG_ATTRIB(conf, EGL_SAMPLE_BUFFERS);
if (!val && GET_CONFIG_ATTRIB(conf, EGL_SAMPLES))
if (!conf->SampleBuffers && conf->Samples)
valid = EGL_FALSE;
if (!valid) {
_eglLog(_EGL_DEBUG, "conflicting samples and sample buffers");
return EGL_FALSE;
}

val = GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE);
if (!(val & EGL_WINDOW_BIT)) {
if (GET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_ID) != 0 ||
GET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE) != EGL_NONE)
if (!(conf->SurfaceType & EGL_WINDOW_BIT)) {
if (conf->NativeVisualID != 0 || conf->NativeVisualType != EGL_NONE)
valid = EGL_FALSE;
}
if (!(val & EGL_PBUFFER_BIT)) {
if (GET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGB) ||
GET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGBA))
if (!(conf->SurfaceType & EGL_PBUFFER_BIT)) {
if (conf->BindToTextureRGB || conf->BindToTextureRGBA)
valid = EGL_FALSE;
}
if (!valid) {
@@ -433,11 +399,11 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria)
continue;

attr = _eglValidationTable[i].attr;
cmp = GET_CONFIG_ATTRIB(criteria, attr);
cmp = _eglGetConfigKey(criteria, attr);
if (cmp == EGL_DONT_CARE)
continue;

val = GET_CONFIG_ATTRIB(conf, attr);
val = _eglGetConfigKey(conf, attr);
switch (_eglValidationTable[i].criterion) {
case ATTRIB_CRITERION_EXACT:
if (val != cmp)
@@ -478,16 +444,11 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria)
static INLINE EGLBoolean
_eglIsConfigAttribValid(_EGLConfig *conf, EGLint attr)
{
if (_eglIndexConfig(conf, attr) < 0)
if (_eglOffsetOfConfig(attr) < 0)
return EGL_FALSE;

/* there are some holes in the range */
switch (attr) {
case 0x3030 /* a gap before EGL_SAMPLES */:
case EGL_NONE:
#ifdef EGL_VERSION_1_4
case EGL_MATCH_NATIVE_PIXMAP:
#endif
return EGL_FALSE;
case EGL_Y_INVERTED_NOK:
return conf->Display->Extensions.NOK_texture_from_pixmap;
@@ -506,15 +467,12 @@ EGLBoolean
_eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list)
{
EGLint attr, val, i;
EGLint config_id = 0, level = 0;
EGLBoolean has_native_visual_type = EGL_FALSE;
EGLBoolean has_transparent_color = EGL_FALSE;

/* reset to default values */
for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) {
attr = _eglValidationTable[i].attr;
val = _eglValidationTable[i].default_value;
SET_CONFIG_ATTRIB(conf, attr, val);
_eglSetConfigKey(conf, attr, val);
}

/* parse the list */
@@ -524,59 +482,33 @@ _eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list)

if (!_eglIsConfigAttribValid(conf, attr))
return EGL_FALSE;
SET_CONFIG_ATTRIB(conf, attr, val);

/* rememeber some attributes for post-processing */
switch (attr) {
case EGL_CONFIG_ID:
config_id = val;
break;
case EGL_LEVEL:
level = val;
break;
case EGL_NATIVE_VISUAL_TYPE:
has_native_visual_type = EGL_TRUE;
break;
case EGL_TRANSPARENT_RED_VALUE:
case EGL_TRANSPARENT_GREEN_VALUE:
case EGL_TRANSPARENT_BLUE_VALUE:
has_transparent_color = EGL_TRUE;
break;
default:
break;
}
_eglSetConfigKey(conf, attr, val);
}

if (!_eglValidateConfig(conf, EGL_TRUE))
return EGL_FALSE;

/* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */
if (level == EGL_DONT_CARE)
if (conf->Level == EGL_DONT_CARE)
return EGL_FALSE;

/* ignore other attributes when EGL_CONFIG_ID is given */
if (config_id > 0) {
_eglResetConfigKeys(conf, EGL_DONT_CARE);
SET_CONFIG_ATTRIB(conf, EGL_CONFIG_ID, config_id);
if (conf->ConfigID > 0) {
for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) {
attr = _eglValidationTable[i].attr;
if (attr != EGL_CONFIG_ID)
_eglSetConfigKey(conf, attr, EGL_DONT_CARE);
}
}
else {
if (has_native_visual_type) {
val = GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE);
if (!(val & EGL_WINDOW_BIT))
SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE, EGL_DONT_CARE);
}
if (!(conf->SurfaceType & EGL_WINDOW_BIT))
conf->NativeVisualType = EGL_DONT_CARE;

if (has_transparent_color) {
val = GET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_TYPE);
if (val == EGL_NONE) {
SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_RED_VALUE,
EGL_DONT_CARE);
SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_GREEN_VALUE,
EGL_DONT_CARE);
SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_BLUE_VALUE,
EGL_DONT_CARE);
}
if (conf->TransparentType == EGL_NONE) {
conf->TransparentRedValue = EGL_DONT_CARE;
conf->TransparentGreenValue = EGL_DONT_CARE;
conf->TransparentBlueValue = EGL_DONT_CARE;
}
}

@@ -610,7 +542,6 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
EGL_ALPHA_MASK_SIZE,
};
EGLint val1, val2;
EGLBoolean rgb_buffer;
EGLint i;

if (conf1 == conf2)
@@ -619,44 +550,41 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
/* the enum values have the desired ordering */
assert(EGL_NONE < EGL_SLOW_CONFIG);
assert(EGL_SLOW_CONFIG < EGL_NON_CONFORMANT_CONFIG);
val1 = GET_CONFIG_ATTRIB(conf1, EGL_CONFIG_CAVEAT);
val2 = GET_CONFIG_ATTRIB(conf2, EGL_CONFIG_CAVEAT);
if (val1 != val2)
return (val1 - val2);
val1 = conf1->ConfigCaveat - conf2->ConfigCaveat;
if (val1)
return val1;

/* the enum values have the desired ordering */
assert(EGL_RGB_BUFFER < EGL_LUMINANCE_BUFFER);
val1 = GET_CONFIG_ATTRIB(conf1, EGL_COLOR_BUFFER_TYPE);
val2 = GET_CONFIG_ATTRIB(conf2, EGL_COLOR_BUFFER_TYPE);
if (val1 != val2)
return (val1 - val2);
rgb_buffer = (val1 == EGL_RGB_BUFFER);
val1 = conf1->ColorBufferType - conf2->ColorBufferType;
if (val1)
return val1;

if (criteria) {
val1 = val2 = 0;
if (rgb_buffer) {
if (GET_CONFIG_ATTRIB(criteria, EGL_RED_SIZE) > 0) {
val1 += GET_CONFIG_ATTRIB(conf1, EGL_RED_SIZE);
val2 += GET_CONFIG_ATTRIB(conf2, EGL_RED_SIZE);
if (conf1->ColorBufferType == EGL_RGB_BUFFER) {
if (criteria->RedSize > 0) {
val1 += conf1->RedSize;
val2 += conf2->RedSize;
}
if (GET_CONFIG_ATTRIB(criteria, EGL_GREEN_SIZE) > 0) {
val1 += GET_CONFIG_ATTRIB(conf1, EGL_GREEN_SIZE);
val2 += GET_CONFIG_ATTRIB(conf2, EGL_GREEN_SIZE);
if (criteria->GreenSize > 0) {
val1 += conf1->GreenSize;
val2 += conf2->GreenSize;
}
if (GET_CONFIG_ATTRIB(criteria, EGL_BLUE_SIZE) > 0) {
val1 += GET_CONFIG_ATTRIB(conf1, EGL_BLUE_SIZE);
val2 += GET_CONFIG_ATTRIB(conf2, EGL_BLUE_SIZE);
if (criteria->BlueSize > 0) {
val1 += conf1->BlueSize;
val2 += conf2->BlueSize;
}
}
else {
if (GET_CONFIG_ATTRIB(criteria, EGL_LUMINANCE_SIZE) > 0) {
val1 += GET_CONFIG_ATTRIB(conf1, EGL_LUMINANCE_SIZE);
val2 += GET_CONFIG_ATTRIB(conf2, EGL_LUMINANCE_SIZE);
if (criteria->LuminanceSize > 0) {
val1 += conf1->LuminanceSize;
val2 += conf2->LuminanceSize;
}
}
if (GET_CONFIG_ATTRIB(criteria, EGL_ALPHA_SIZE) > 0) {
val1 += GET_CONFIG_ATTRIB(conf1, EGL_ALPHA_SIZE);
val2 += GET_CONFIG_ATTRIB(conf2, EGL_ALPHA_SIZE);
if (criteria->AlphaSize > 0) {
val1 += conf1->AlphaSize;
val2 += conf2->AlphaSize;
}
}
else {
@@ -669,24 +597,15 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
return (val2 - val1);

for (i = 0; i < ARRAY_SIZE(compare_attribs); i++) {
val1 = GET_CONFIG_ATTRIB(conf1, compare_attribs[i]);
val2 = GET_CONFIG_ATTRIB(conf2, compare_attribs[i]);
val1 = _eglGetConfigKey(conf1, compare_attribs[i]);
val2 = _eglGetConfigKey(conf2, compare_attribs[i]);
if (val1 != val2)
return (val1 - val2);
}

/* EGL_NATIVE_VISUAL_TYPE cannot be compared here */

if (compare_id) {
val1 = GET_CONFIG_ATTRIB(conf1, EGL_CONFIG_ID);
val2 = GET_CONFIG_ATTRIB(conf2, EGL_CONFIG_ID);
assert(val1 != val2);
}
else {
val1 = val2 = 0;
}

return (val1 - val2);
return (compare_id) ? (conf1->ConfigID - conf2->ConfigID) : 0;
}


@@ -802,7 +721,7 @@ _eglGetConfigAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf,
if (!value)
return _eglError(EGL_BAD_PARAMETER, "eglGetConfigAttrib");

*value = GET_CONFIG_ATTRIB(conf, attribute);
*value = _eglGetConfigKey(conf, attribute);
return EGL_TRUE;
}


+ 87
- 46
src/egl/main/eglconfig.h Vedi File

@@ -6,26 +6,49 @@
#include "egltypedefs.h"


#define _EGL_CONFIG_FIRST_ATTRIB EGL_BUFFER_SIZE
#define _EGL_CONFIG_LAST_ATTRIB EGL_CONFORMANT
#define _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS \
(_EGL_CONFIG_LAST_ATTRIB - _EGL_CONFIG_FIRST_ATTRIB + 1)

/* Attributes outside the contiguous block:
*
* EGL_Y_INVERTED_NOK
*/
#define _EGL_CONFIG_FIRST_EXTRA_ATTRIB _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS
#define _EGL_CONFIG_NUM_EXTRA_ATTRIBS 1

#define _EGL_CONFIG_NUM_ATTRIBS \
_EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS + _EGL_CONFIG_NUM_EXTRA_ATTRIBS


/* update _eglValidationTable and _eglOffsetOfConfig before updating this
* struct */
struct _egl_config
{
_EGLDisplay *Display;
EGLint Storage[_EGL_CONFIG_NUM_ATTRIBS];

/* core */
EGLint BufferSize;
EGLint AlphaSize;
EGLint BlueSize;
EGLint GreenSize;
EGLint RedSize;
EGLint DepthSize;
EGLint StencilSize;
EGLint ConfigCaveat;
EGLint ConfigID;
EGLint Level;
EGLint MaxPbufferHeight;
EGLint MaxPbufferPixels;
EGLint MaxPbufferWidth;
EGLint NativeRenderable;
EGLint NativeVisualID;
EGLint NativeVisualType;
EGLint Samples;
EGLint SampleBuffers;
EGLint SurfaceType;
EGLint TransparentType;
EGLint TransparentBlueValue;
EGLint TransparentGreenValue;
EGLint TransparentRedValue;
EGLint BindToTextureRGB;
EGLint BindToTextureRGBA;
EGLint MinSwapInterval;
EGLint MaxSwapInterval;
EGLint LuminanceSize;
EGLint AlphaMaskSize;
EGLint ColorBufferType;
EGLint RenderableType;
EGLint MatchNativePixmap;
EGLint Conformant;

/* extensions */
EGLint YInvertedNOK;
};


@@ -37,38 +60,56 @@ struct _egl_config


/**
* Given a key, return an index into the storage of the config.
* Return -1 if the key is invalid.
* Map an EGL attribute enum to the offset of the member in _EGLConfig.
*/
static INLINE EGLint
_eglIndexConfig(const _EGLConfig *conf, EGLint key)
_eglOffsetOfConfig(EGLint attr)
{
(void) conf;
if (key >= _EGL_CONFIG_FIRST_ATTRIB &&
key < _EGL_CONFIG_FIRST_ATTRIB + _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS)
return key - _EGL_CONFIG_FIRST_ATTRIB;
switch (key) {
case EGL_Y_INVERTED_NOK:
return _EGL_CONFIG_FIRST_EXTRA_ATTRIB;
switch (attr) {
#define ATTRIB_MAP(attr, memb) case attr: return offsetof(_EGLConfig, memb)
/* core */
ATTRIB_MAP(EGL_BUFFER_SIZE, BufferSize);
ATTRIB_MAP(EGL_ALPHA_SIZE, AlphaSize);
ATTRIB_MAP(EGL_BLUE_SIZE, BlueSize);
ATTRIB_MAP(EGL_GREEN_SIZE, GreenSize);
ATTRIB_MAP(EGL_RED_SIZE, RedSize);
ATTRIB_MAP(EGL_DEPTH_SIZE, DepthSize);
ATTRIB_MAP(EGL_STENCIL_SIZE, StencilSize);
ATTRIB_MAP(EGL_CONFIG_CAVEAT, ConfigCaveat);
ATTRIB_MAP(EGL_CONFIG_ID, ConfigID);
ATTRIB_MAP(EGL_LEVEL, Level);
ATTRIB_MAP(EGL_MAX_PBUFFER_HEIGHT, MaxPbufferHeight);
ATTRIB_MAP(EGL_MAX_PBUFFER_PIXELS, MaxPbufferPixels);
ATTRIB_MAP(EGL_MAX_PBUFFER_WIDTH, MaxPbufferWidth);
ATTRIB_MAP(EGL_NATIVE_RENDERABLE, NativeRenderable);
ATTRIB_MAP(EGL_NATIVE_VISUAL_ID, NativeVisualID);
ATTRIB_MAP(EGL_NATIVE_VISUAL_TYPE, NativeVisualType);
ATTRIB_MAP(EGL_SAMPLES, Samples);
ATTRIB_MAP(EGL_SAMPLE_BUFFERS, SampleBuffers);
ATTRIB_MAP(EGL_SURFACE_TYPE, SurfaceType);
ATTRIB_MAP(EGL_TRANSPARENT_TYPE, TransparentType);
ATTRIB_MAP(EGL_TRANSPARENT_BLUE_VALUE, TransparentBlueValue);
ATTRIB_MAP(EGL_TRANSPARENT_GREEN_VALUE, TransparentGreenValue);
ATTRIB_MAP(EGL_TRANSPARENT_RED_VALUE, TransparentRedValue);
ATTRIB_MAP(EGL_BIND_TO_TEXTURE_RGB, BindToTextureRGB);
ATTRIB_MAP(EGL_BIND_TO_TEXTURE_RGBA, BindToTextureRGBA);
ATTRIB_MAP(EGL_MIN_SWAP_INTERVAL, MinSwapInterval);
ATTRIB_MAP(EGL_MAX_SWAP_INTERVAL, MaxSwapInterval);
ATTRIB_MAP(EGL_LUMINANCE_SIZE, LuminanceSize);
ATTRIB_MAP(EGL_ALPHA_MASK_SIZE, AlphaMaskSize);
ATTRIB_MAP(EGL_COLOR_BUFFER_TYPE, ColorBufferType);
ATTRIB_MAP(EGL_RENDERABLE_TYPE, RenderableType);
ATTRIB_MAP(EGL_MATCH_NATIVE_PIXMAP, MatchNativePixmap);
ATTRIB_MAP(EGL_CONFORMANT, Conformant);
/* extensions */
ATTRIB_MAP(EGL_Y_INVERTED_NOK, YInvertedNOK);
#undef ATTRIB_MAP
default:
return -1;
}
}


/**
* Reset all keys in the config to a given value.
*/
static INLINE void
_eglResetConfigKeys(_EGLConfig *conf, EGLint val)
{
EGLint i;
for (i = 0; i < _EGL_CONFIG_NUM_ATTRIBS; i++)
conf->Storage[i] = val;
}


/**
* Update a config for a given key.
*
@@ -79,9 +120,9 @@ _eglResetConfigKeys(_EGLConfig *conf, EGLint val)
static INLINE void
_eglSetConfigKey(_EGLConfig *conf, EGLint key, EGLint val)
{
EGLint idx = _eglIndexConfig(conf, key);
assert(idx >= 0);
conf->Storage[idx] = val;
EGLint offset = _eglOffsetOfConfig(key);
assert(offset >= 0);
*((EGLint *) ((char *) conf + offset)) = val;
}


@@ -91,9 +132,9 @@ _eglSetConfigKey(_EGLConfig *conf, EGLint key, EGLint val)
static INLINE EGLint
_eglGetConfigKey(const _EGLConfig *conf, EGLint key)
{
EGLint idx = _eglIndexConfig(conf, key);
assert(idx >= 0);
return conf->Storage[idx];
EGLint offset = _eglOffsetOfConfig(key);
assert(offset >= 0);
return *((EGLint *) ((char *) conf + offset));
}



+ 6
- 5
src/egl/main/eglcontext.c Vedi File

@@ -113,13 +113,12 @@ _eglInitContext(_EGLContext *ctx, _EGLDisplay *dpy, _EGLConfig *conf,

err = _eglParseContextAttribList(ctx, attrib_list);
if (err == EGL_SUCCESS && ctx->Config) {
EGLint renderable_type, api_bit;
EGLint api_bit;

renderable_type = GET_CONFIG_ATTRIB(ctx->Config, EGL_RENDERABLE_TYPE);
api_bit = _eglGetContextAPIBit(ctx);
if (!(renderable_type & api_bit)) {
if (!(ctx->Config->RenderableType & api_bit)) {
_eglLog(_EGL_DEBUG, "context api is 0x%x while config supports 0x%x",
api_bit, renderable_type);
api_bit, ctx->Config->RenderableType);
err = EGL_BAD_CONFIG;
}
}
@@ -183,7 +182,9 @@ _eglQueryContext(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *c,

switch (attribute) {
case EGL_CONFIG_ID:
*value = GET_CONFIG_ATTRIB(c->Config, EGL_CONFIG_ID);
if (!c->Config)
return _eglError(EGL_BAD_ATTRIBUTE, "eglQueryContext");
*value = c->Config->ConfigID;
break;
case EGL_CONTEXT_CLIENT_VERSION:
*value = c->ClientVersion;

+ 1
- 1
src/egl/main/egldisplay.c Vedi File

@@ -27,7 +27,7 @@ _eglGetNativePlatformFromEnv(void)
} egl_platforms[_EGL_NUM_PLATFORMS] = {
{ _EGL_PLATFORM_WINDOWS, "gdi" },
{ _EGL_PLATFORM_X11, "x11" },
{ _EGL_PLATFORM_DRM, "kms" },
{ _EGL_PLATFORM_DRM, "drm" },
{ _EGL_PLATFORM_FBDEV, "fbdev" }
};
_EGLPlatformType plat = _EGL_INVALID_PLATFORM;

+ 0
- 1
src/egl/main/egldriver.c Vedi File

@@ -18,7 +18,6 @@
#include "eglmisc.h"
#include "eglmode.h"
#include "eglscreen.h"
#include "eglstring.h"
#include "eglsurface.h"
#include "eglimage.h"
#include "eglsync.h"

+ 7
- 7
src/egl/main/eglsurface.c Vedi File

@@ -17,12 +17,12 @@
static void
_eglClampSwapInterval(_EGLSurface *surf, EGLint interval)
{
EGLint bound = GET_CONFIG_ATTRIB(surf->Config, EGL_MAX_SWAP_INTERVAL);
EGLint bound = surf->Config->MaxSwapInterval;
if (interval >= bound) {
interval = bound;
}
else {
bound = GET_CONFIG_ATTRIB(surf->Config, EGL_MIN_SWAP_INTERVAL);
bound = surf->Config->MinSwapInterval;
if (interval < bound)
interval = bound;
}
@@ -263,7 +263,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
return EGL_FALSE;
}

if ((GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE) & type) == 0) {
if ((conf->SurfaceType & type) == 0) {
/* The config can't be used to create a surface of this type */
_eglError(EGL_BAD_CONFIG, func);
return EGL_FALSE;
@@ -333,7 +333,7 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
*value = surface->Height;
break;
case EGL_CONFIG_ID:
*value = GET_CONFIG_ATTRIB(surface->Config, EGL_CONFIG_ID);
*value = surface->Config->ConfigID;
break;
case EGL_LARGEST_PBUFFER:
*value = surface->LargestPbuffer;
@@ -445,7 +445,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,

switch (attribute) {
case EGL_MIPMAP_LEVEL:
confval = GET_CONFIG_ATTRIB(surface->Config, EGL_RENDERABLE_TYPE);
confval = surface->Config->RenderableType;
if (!(confval & (EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT))) {
err = EGL_BAD_PARAMETER;
break;
@@ -457,7 +457,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
case EGL_MULTISAMPLE_RESOLVE_DEFAULT:
break;
case EGL_MULTISAMPLE_RESOLVE_BOX:
confval = GET_CONFIG_ATTRIB(surface->Config, EGL_SURFACE_TYPE);
confval = surface->Config->SurfaceType;
if (!(confval & EGL_MULTISAMPLE_RESOLVE_BOX_BIT))
err = EGL_BAD_MATCH;
break;
@@ -474,7 +474,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
case EGL_BUFFER_DESTROYED:
break;
case EGL_BUFFER_PRESERVED:
confval = GET_CONFIG_ATTRIB(surface->Config, EGL_SURFACE_TYPE);
confval = surface->Config->SurfaceType;
if (!(confval & EGL_SWAP_BEHAVIOR_PRESERVED_BIT))
err = EGL_BAD_MATCH;
break;

+ 1
- 1
src/gallium/Makefile.template Vedi File

@@ -40,7 +40,7 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURC
touch depend
$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(GENERATED_SOURCES) 2> /dev/null

$(PROGS): % : %.o
$(PROGS): % : %.o $(PROGS_DEPS)
$(LD) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group

# Emacs tags

+ 3
- 0
src/gallium/auxiliary/Makefile Vedi File

@@ -8,6 +8,7 @@ C_SOURCES = \
cso_cache/cso_context.c \
cso_cache/cso_hash.c \
draw/draw_context.c \
draw/draw_fs.c \
draw/draw_gs.c \
draw/draw_pipe.c \
draw/draw_pipe_aaline.c \
@@ -121,6 +122,7 @@ C_SOURCES = \
util/u_handle_table.c \
util/u_hash.c \
util/u_hash_table.c \
util/u_index_modify.c \
util/u_keymap.c \
util/u_linear.c \
util/u_linkage.c \
@@ -174,6 +176,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
gallivm/lp_bld_tgsi_aos.c \
gallivm/lp_bld_tgsi_info.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \

+ 3
- 0
src/gallium/auxiliary/SConscript Vedi File

@@ -54,6 +54,7 @@ source = [
'cso_cache/cso_context.c',
'cso_cache/cso_hash.c',
'draw/draw_context.c',
'draw/draw_fs.c',
'draw/draw_gs.c',
'draw/draw_pipe.c',
'draw/draw_pipe_aaline.c',
@@ -170,6 +171,7 @@ source = [
'util/u_handle_table.c',
'util/u_hash.c',
'util/u_hash_table.c',
'util/u_index_modify.c',
'util/u_keymap.c',
'util/u_linear.c',
'util/u_linkage.c',
@@ -225,6 +227,7 @@ if env['llvm']:
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
'gallivm/lp_bld_tgsi_aos.c',
'gallivm/lp_bld_tgsi_info.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',

+ 55
- 12
src/gallium/auxiliary/draw/draw_context.c Vedi File

@@ -413,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
}



/**
* Allocate an extra vertex/geometry shader vertex attribute.
* This is used by some of the optional draw module stages such
* as wide_point which may need to allocate additional generic/texcoord
* attributes.
*/
int
draw_alloc_extra_vertex_attrib(struct draw_context *draw,
uint semantic_name, uint semantic_index)
{
const int num_outputs = draw_current_shader_outputs(draw);
const int n = draw->extra_shader_outputs.num;

assert(n < Elements(draw->extra_shader_outputs.semantic_name));

draw->extra_shader_outputs.semantic_name[n] = semantic_name;
draw->extra_shader_outputs.semantic_index[n] = semantic_index;
draw->extra_shader_outputs.slot[n] = num_outputs + n;
draw->extra_shader_outputs.num++;

return draw->extra_shader_outputs.slot[n];
}


/**
* Remove all extra vertex attributes that were allocated with
* draw_alloc_extra_vertex_attrib().
*/
void
draw_remove_extra_vertex_attribs(struct draw_context *draw)
{
draw->extra_shader_outputs.num = 0;
}


/**
* Ask the draw module for the location/slot of the given vertex attribute in
* a post-transformed vertex.
@@ -446,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw,
return i;
}

/* XXX there may be more than one extra vertex attrib.
* For example, simulated gl_FragCoord and gl_PointCoord.
*/
if (draw->extra_shader_outputs.semantic_name == semantic_name &&
draw->extra_shader_outputs.semantic_index == semantic_index) {
return draw->extra_shader_outputs.slot;
/* Search the extra vertex attributes */
for (i = 0; i < draw->extra_shader_outputs.num; i++) {
if (draw->extra_shader_outputs.semantic_name[i] == semantic_name &&
draw->extra_shader_outputs.semantic_index[i] == semantic_index) {
return draw->extra_shader_outputs.slot[i];
}
}

return 0;
@@ -470,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw,
uint
draw_num_shader_outputs(const struct draw_context *draw)
{
uint count = draw->vs.vertex_shader->info.num_outputs;
uint count;

/* If a geometry shader is present, its outputs go to the
* driver, else the vertex shader's outputs.
*/
if (draw->gs.geometry_shader)
count = draw->gs.geometry_shader->info.num_outputs;
else
count = draw->vs.vertex_shader->info.num_outputs;

count += draw->extra_shader_outputs.num;

if (draw->extra_shader_outputs.slot > 0)
count++;
return count;
}

@@ -671,6 +709,11 @@ draw_set_samplers(struct draw_context *draw,
draw->samplers[i] = NULL;

draw->num_samplers = num;

#ifdef HAVE_LLVM
if (draw->llvm)
draw_llvm_set_sampler_state(draw);
#endif
}

void
@@ -678,9 +721,9 @@ draw_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
const void *data[DRAW_MAX_TEXTURE_LEVELS])
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
const void *data[PIPE_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
if(draw->llvm)

+ 15
- 4
src/gallium/auxiliary/draw/draw_context.h Vedi File

@@ -46,9 +46,9 @@ struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
struct draw_geometry_shader;
struct draw_fragment_shader;
struct tgsi_sampler;

#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */

struct draw_context *draw_create( struct pipe_context *pipe );

@@ -119,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
const void *data[DRAW_MAX_TEXTURE_LEVELS]);
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
const void *data[PIPE_MAX_TEXTURE_LEVELS]);


/*
@@ -137,6 +137,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs);


/*
* Fragment shader functions
*/
struct draw_fragment_shader *
draw_create_fragment_shader(struct draw_context *draw,
const struct pipe_shader_state *shader);
void draw_bind_fragment_shader(struct draw_context *draw,
struct draw_fragment_shader *dvs);
void draw_delete_fragment_shader(struct draw_context *draw,
struct draw_fragment_shader *dvs);

/*
* Geometry shader functions
*/

+ 73
- 0
src/gallium/auxiliary/draw/draw_fs.c Vedi File

@@ -0,0 +1,73 @@
/**************************************************************************
*
* Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/

#include "pipe/p_shader_tokens.h"

#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_prim.h"

#include "tgsi/tgsi_parse.h"

#include "draw_fs.h"
#include "draw_private.h"
#include "draw_context.h"


struct draw_fragment_shader *
draw_create_fragment_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
{
struct draw_fragment_shader *dfs;

dfs = CALLOC_STRUCT(draw_fragment_shader);
if (dfs) {
dfs->base = *shader;
tgsi_scan_shader(shader->tokens, &dfs->info);
}

return dfs;
}


void
draw_bind_fragment_shader(struct draw_context *draw,
struct draw_fragment_shader *dfs)
{
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);

draw->fs.fragment_shader = dfs;
}


void
draw_delete_fragment_shader(struct draw_context *draw,
struct draw_fragment_shader *dfs)
{
FREE(dfs);
}


+ 42
- 0
src/gallium/auxiliary/draw/draw_fs.h Vedi File

@@ -0,0 +1,42 @@
/**************************************************************************
*
* Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/

#ifndef DRAW_FS_H
#define DRAW_FS_H


#include "tgsi/tgsi_scan.h"


struct draw_fragment_shader
{
struct pipe_shader_state base;
struct tgsi_shader_info info;
};


#endif /* DRAW_FS_H */

+ 43
- 6
src/gallium/auxiliary/draw/draw_llvm.c Vedi File

@@ -44,6 +44,7 @@
#include "tgsi/tgsi_dump.h"

#include "util/u_cpu_detect.h"
#include "util/u_math.h"
#include "util/u_pointer.h"
#include "util/u_string.h"

@@ -71,12 +72,17 @@ init_globals(struct draw_llvm *llvm)
elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_DATA] =
LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
DRAW_MAX_TEXTURE_LEVELS);
PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
LLVMArrayType(LLVMFloatType(), 4);

texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);

@@ -101,6 +107,18 @@ init_globals(struct draw_llvm *llvm)
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_DATA);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_MIN_LOD);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_MAX_LOD);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_LOD_BIAS);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_BORDER_COLOR);
LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
llvm->target, texture_type);

@@ -1048,9 +1066,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
const void *data[DRAW_MAX_TEXTURE_LEVELS])
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
const void *data[PIPE_MAX_TEXTURE_LEVELS])
{
unsigned j;
struct draw_jit_texture *jit_tex;
@@ -1072,6 +1090,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
}
}


void
draw_llvm_set_sampler_state(struct draw_context *draw)
{
unsigned i;

for (i = 0; i < draw->num_samplers; i++) {
struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];

if (draw->samplers[i]) {
jit_tex->min_lod = draw->samplers[i]->min_lod;
jit_tex->max_lod = draw->samplers[i]->max_lod;
jit_tex->lod_bias = draw->samplers[i]->lod_bias;
COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
}
}
}


void
draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
{

+ 17
- 7
src/gallium/auxiliary/draw/draw_llvm.h Vedi File

@@ -41,7 +41,6 @@
#include <llvm-c/Target.h>
#include <llvm-c/ExecutionEngine.h>

#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */

struct draw_llvm;
struct llvm_vertex_shader;
@@ -52,9 +51,13 @@ struct draw_jit_texture
uint32_t height;
uint32_t depth;
uint32_t last_level;
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
const void *data[DRAW_MAX_TEXTURE_LEVELS];
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
const void *data[PIPE_MAX_TEXTURE_LEVELS];
float min_lod;
float max_lod;
float lod_bias;
float border_color[4];
};

enum {
@@ -65,6 +68,10 @@ enum {
DRAW_JIT_TEXTURE_ROW_STRIDE,
DRAW_JIT_TEXTURE_IMG_STRIDE,
DRAW_JIT_TEXTURE_DATA,
DRAW_JIT_TEXTURE_MIN_LOD,
DRAW_JIT_TEXTURE_MAX_LOD,
DRAW_JIT_TEXTURE_LOD_BIAS,
DRAW_JIT_TEXTURE_BORDER_COLOR,
DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
};

@@ -274,13 +281,16 @@ struct lp_build_sampler_soa *
draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
LLVMValueRef context_ptr);

void
draw_llvm_set_sampler_state(struct draw_context *draw);

void
draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
const void *data[DRAW_MAX_TEXTURE_LEVELS]);
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
const void *data[PIPE_MAX_TEXTURE_LEVELS]);

#endif

+ 8
- 0
src/gallium/auxiliary/draw/draw_llvm_sample.c Vedi File

@@ -146,6 +146,10 @@ DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE)
DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE)
DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE)
DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE)
DRAW_LLVM_TEXTURE_MEMBER(min_lod, DRAW_JIT_TEXTURE_MIN_LOD, TRUE)
DRAW_LLVM_TEXTURE_MEMBER(max_lod, DRAW_JIT_TEXTURE_MAX_LOD, TRUE)
DRAW_LLVM_TEXTURE_MEMBER(lod_bias, DRAW_JIT_TEXTURE_LOD_BIAS, TRUE)
DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE)


static void
@@ -207,6 +211,10 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride;
sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride;
sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr;
sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod;
sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod;
sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias;
sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color;
sampler->dynamic_state.static_state = static_state;
sampler->dynamic_state.context_ptr = context_ptr;


+ 4
- 5
src/gallium/auxiliary/draw/draw_pipe_aaline.c Vedi File

@@ -688,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
aaline->tex_slot = draw_current_shader_outputs(draw);
aaline->pos_slot = draw_current_shader_position_output(draw);;

/* advertise the extra post-transformed vertex attribute */
draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
draw->extra_shader_outputs.slot = aaline->tex_slot;
/* allocate the extra post-transformed vertex attribute */
(void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
aaline->fs->generic_attrib);

/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -744,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)

draw->suspend_flushing = FALSE;

draw->extra_shader_outputs.slot = 0;
draw_remove_extra_vertex_attribs(draw);
}



+ 4
- 4
src/gallium/auxiliary/draw/draw_pipe_aapoint.c Vedi File

@@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)

aapoint->pos_slot = draw_current_shader_position_output(draw);

draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
draw->extra_shader_outputs.slot = aapoint->tex_slot;
/* allocate the extra post-transformed vertex attribute */
(void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
aapoint->fs->generic_attrib);

/* find psize slot in post-transform vertex */
aapoint->psize_slot = -1;
@@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)

draw->suspend_flushing = FALSE;

draw->extra_shader_outputs.slot = 0;
draw_remove_extra_vertex_attribs(draw);
}



+ 31
- 18
src/gallium/auxiliary/draw/draw_pipe_clip.c Vedi File

@@ -263,6 +263,8 @@ do_clip_tri( struct draw_stage *stage,
clipmask &= ~(1<<plane_idx);

assert(n < MAX_CLIPPED_VERTICES);
if (n >= MAX_CLIPPED_VERTICES)
return;
inlist[n] = inlist[0]; /* prevent rotation of vertices */

for (i = 1; i <= n; i++) {
@@ -272,16 +274,22 @@ do_clip_tri( struct draw_stage *stage,

if (!IS_NEGATIVE(dp_prev)) {
assert(outcount < MAX_CLIPPED_VERTICES);
if (outcount >= MAX_CLIPPED_VERTICES)
return;
outlist[outcount++] = vert_prev;
}

if (DIFFERENT_SIGNS(dp, dp_prev)) {
struct vertex_header *new_vert;

assert(tmpnr < MAX_CLIPPED_VERTICES+1);
assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
return;
new_vert = clipper->stage.tmp[tmpnr++];

assert(outcount < MAX_CLIPPED_VERTICES);
if (outcount >= MAX_CLIPPED_VERTICES)
return;
outlist[outcount++] = new_vert;

if (IS_NEGATIVE(dp)) {
@@ -321,27 +329,32 @@ do_clip_tri( struct draw_stage *stage,

/* If flat-shading, copy provoking vertex color to polygon vertex[0]
*/
if (clipper->flat) {
if (stage->draw->rasterizer->flatshade_first) {
if (inlist[0] != header->v[0]) {
assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[0]);
if (n >= 3) {
if (clipper->flat) {
if (stage->draw->rasterizer->flatshade_first) {
if (inlist[0] != header->v[0]) {
assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
return;
inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[0]);
}
}
}
else {
if (inlist[0] != header->v[2]) {
assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[2]);
else {
if (inlist[0] != header->v[2]) {
assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
return;
inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[2]);
}
}
}
}

/* Emit the polygon as triangles to the setup stage:
*/
if (n >= 3)
/* Emit the polygon as triangles to the setup stage:
*/
emit_poly( stage, inlist, n, header );
}
}



+ 2
- 2
src/gallium/auxiliary/draw/draw_pipe_validate.c Vedi File

@@ -172,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
&& !rast->line_smooth);

/* drawing large points? */
/* drawing large/sprite points (but not AA points)? */
if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (rast->point_smooth && draw->pipeline.aapoint)
@@ -207,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
precalc_flat = TRUE;
}

if (wide_points || rast->sprite_coord_enable) {
if (wide_points) {
draw->pipeline.wide_point->next = next;
next = draw->pipeline.wide_point;
}

+ 58
- 60
src/gallium/auxiliary/draw/draw_pipe_wide_point.c Vedi File

@@ -57,26 +57,24 @@
#include "util/u_memory.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "draw_fs.h"
#include "draw_vs.h"
#include "draw_pipe.h"


struct widepoint_stage {
struct draw_stage stage;
struct draw_stage stage; /**< base class */

float half_point_size;

float xbias;
float ybias;

uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS];
uint num_texcoords;
uint texcoord_mode;
/** for automatic texcoord generation/replacement */
uint num_texcoord_gen;
uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS];

int psize_slot;

int point_coord_fs_input; /**< input for pointcoord */
};


@@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage )
static void set_texcoords(const struct widepoint_stage *wide,
struct vertex_header *v, const float tc[4])
{
const struct draw_context *draw = wide->stage.draw;
const struct pipe_rasterizer_state *rast = draw->rasterizer;
const uint texcoord_mode = rast->sprite_coord_mode;
uint i;
for (i = 0; i < wide->num_texcoords; i++) {
if (wide->texcoord_enable[i]) {
uint j = wide->texcoord_slot[i];
v->data[j][0] = tc[0];
if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
v->data[j][1] = 1.0f - tc[1];
else
v->data[j][1] = tc[1];
v->data[j][2] = tc[2];
v->data[j][3] = tc[3];
}
}

if (wide->point_coord_fs_input >= 0) {
/* put gl_PointCoord into the extra vertex slot */
uint slot = wide->stage.draw->extra_shader_outputs.slot;
for (i = 0; i < wide->num_texcoord_gen; i++) {
const uint slot = wide->texcoord_gen_slot[i];
v->data[slot][0] = tc[0];
if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
v->data[slot][1] = 1.0f - tc[1];
else
v->data[slot][1] = tc[1];
v->data[slot][2] = 0.0F;
v->data[slot][3] = 1.0F;
v->data[slot][2] = tc[2];
v->data[slot][3] = tc[3];
}
}

@@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage,
}


static int
find_pntc_input_attrib(struct draw_context *draw)
{
/* Scan the fragment program's input decls to find the pointcoord
* attribute. The xy components will store the point coord.
*/
return 0; /* XXX fix this */
}


static void widepoint_first_point( struct draw_stage *stage,
struct prim_header *header )
static void
widepoint_first_point(struct draw_stage *stage,
struct prim_header *header)
{
struct widepoint_stage *wide = widepoint_stage(stage);
struct draw_context *draw = stage->draw;
@@ -244,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage,
stage->point = draw_pipe_passthrough_point;
}

draw_remove_extra_vertex_attribs(draw);

if (rast->point_quad_rasterization) {
/* find vertex shader texcoord outputs */
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i, j = 0;
wide->texcoord_mode = rast->sprite_coord_mode;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
wide->texcoord_slot[j] = i;
wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1;
j++;
const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
uint i;

wide->num_texcoord_gen = 0;

/* Loop over fragment shader inputs looking for generic inputs
* for which bit 'k' in sprite_coord_enable is set.
*/
for (i = 0; i < fs->info.num_inputs; i++) {
if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
const int generic_index = fs->info.input_semantic_index[i];
/* Note that sprite_coord enable is a bitfield of
* PIPE_MAX_SHADER_OUTPUTS bits.
*/
if (generic_index < PIPE_MAX_SHADER_OUTPUTS &&
(rast->sprite_coord_enable & (1 << generic_index))) {
/* OK, this generic attribute needs to be replaced with a
* texcoord (see above).
*/
int slot = draw_find_shader_output(draw,
TGSI_SEMANTIC_GENERIC,
generic_index);

if (slot > 0) {
/* there's already a post-vertex shader attribute
* for this fragment shader input attribute.
*/
}
else {
/* need to allocate a new post-vertex shader attribute */
slot = draw_alloc_extra_vertex_attrib(draw,
TGSI_SEMANTIC_GENERIC,
generic_index);
}

/* add this slot to the texcoord-gen list */
wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
}
}
}
wide->num_texcoords = j;

/* find fragment shader PointCoord input */
wide->point_coord_fs_input = find_pntc_input_attrib(draw);

/* setup extra vp output (point coord implemented as a texcoord) */
draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
draw->extra_shader_outputs.semantic_index = 0;
draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
}
else {
wide->point_coord_fs_input = -1;
draw->extra_shader_outputs.slot = 0;
}

wide->psize_slot = -1;
@@ -295,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )

stage->point = widepoint_first_point;
stage->next->flush( stage->next, flags );
stage->draw->extra_shader_outputs.slot = 0;

draw_remove_extra_vertex_attribs(draw);

/* restore original rasterizer state */
if (draw->rast_handle) {

+ 14
- 3
src/gallium/auxiliary/draw/draw_private.h Vedi File

@@ -250,6 +250,11 @@ struct draw_context
struct tgsi_sampler **samplers;
} gs;

/** Fragment shader state */
struct {
struct draw_fragment_shader *fragment_shader;
} fs;

/** Stream output (vertex feedback) state */
struct {
struct pipe_stream_output_state state;
@@ -266,9 +271,10 @@ struct draw_context
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
struct {
uint semantic_name;
uint semantic_index;
int slot;
uint num;
uint semantic_name[10];
uint semantic_index[10];
uint slot[10];
} extra_shader_outputs;

unsigned reduced_prim;
@@ -362,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw );
uint draw_current_shader_outputs(const struct draw_context *draw);
uint draw_current_shader_position_output(const struct draw_context *draw);

int draw_alloc_extra_vertex_attrib(struct draw_context *draw,
uint semantic_name, uint semantic_index);
void draw_remove_extra_vertex_attribs(struct draw_context *draw);


/*******************************************************************************
* Vertex processing (was passthrough) code:
*/

+ 427
- 84
src/gallium/auxiliary/gallivm/lp_bld_arit.c Vedi File

@@ -614,17 +614,15 @@ lp_build_div(struct lp_build_context *bld,


/**
* Linear interpolation.
*
* This also works for integer values with a few caveats.
* Linear interpolation -- without any checks.
*
* @sa http://www.stereopsis.com/doubleblend.html
*/
LLVMValueRef
lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
static INLINE LLVMValueRef
lp_build_lerp_simple(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
LLVMValueRef delta;
LLVMValueRef res;
@@ -639,12 +637,80 @@ lp_build_lerp(struct lp_build_context *bld,

res = lp_build_add(bld, v0, res);

if(bld->type.fixed)
if (bld->type.fixed) {
/* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
* but it will be wrong for other uses. Basically we need a more
* powerful lp_type, capable of further distinguishing the values
* interpretation from the value storage. */
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
}

return res;
}


/**
* Linear interpolation.
*/
LLVMValueRef
lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
const struct lp_type type = bld->type;
LLVMValueRef res;

assert(lp_check_value(type, x));
assert(lp_check_value(type, v0));
assert(lp_check_value(type, v1));

if (type.norm) {
struct lp_type wide_type;
struct lp_build_context wide_bld;
LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
LLVMValueRef shift;

assert(type.length >= 2);
assert(!type.sign);

/*
* Create a wider type, enough to hold the intermediate result of the
* multiplication.
*/
memset(&wide_type, 0, sizeof wide_type);
wide_type.fixed = TRUE;
wide_type.width = type.width*2;
wide_type.length = type.length/2;

lp_build_context_init(&wide_bld, bld->builder, wide_type);

lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh);
lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h);
lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h);

/*
* Scale x from [0, 255] to [0, 256]
*/

shift = lp_build_const_int_vec(wide_type, type.width - 1);

xl = lp_build_add(&wide_bld, xl,
LLVMBuildAShr(bld->builder, xl, shift, ""));
xh = lp_build_add(&wide_bld, xh,
LLVMBuildAShr(bld->builder, xh, shift, ""));

/*
* Lerp both halves.
*/

resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l);
resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h);

res = lp_build_pack2(bld->builder, wide_type, type, resl, resh);
} else {
res = lp_build_lerp_simple(bld, x, v0, v1);
}

return res;
}
@@ -923,35 +989,122 @@ lp_build_round_sse41(struct lp_build_context *bld,
enum lp_build_round_sse41_mode mode)
{
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef i32t = LLVMInt32Type();
const char *intrinsic;
LLVMValueRef res;

assert(type.floating);
assert(type.width*type.length == 128);
assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse4_1);

switch(type.width) {
case 32:
intrinsic = "llvm.x86.sse41.round.ps";
break;
case 64:
intrinsic = "llvm.x86.sse41.round.pd";
break;
default:
assert(0);
return bld->undef;
if (type.length == 1) {
LLVMTypeRef vec_type;
LLVMValueRef undef;
LLVMValueRef args[3];
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);

switch(type.width) {
case 32:
intrinsic = "llvm.x86.sse41.round.ss";
break;
case 64:
intrinsic = "llvm.x86.sse41.round.sd";
break;
default:
assert(0);
return bld->undef;
}

vec_type = LLVMVectorType(bld->elem_type, 4);

undef = LLVMGetUndef(vec_type);

args[0] = undef;
args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
args[2] = LLVMConstInt(i32t, mode, 0);

res = lp_build_intrinsic(bld->builder, intrinsic,
vec_type, args, Elements(args));

res = LLVMBuildExtractElement(bld->builder, res, index0, "");
}
else {
assert(type.width*type.length == 128);

switch(type.width) {
case 32:
intrinsic = "llvm.x86.sse41.round.ps";
break;
case 64:
intrinsic = "llvm.x86.sse41.round.pd";
break;
default:
assert(0);
return bld->undef;
}

res = lp_build_intrinsic_binary(bld->builder, intrinsic,
bld->vec_type, a,
LLVMConstInt(i32t, mode, 0));
}

return res;
}


static INLINE LLVMValueRef
lp_build_iround_nearest_sse2(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMTypeRef i32t = LLVMInt32Type();
LLVMTypeRef ret_type = lp_build_int_vec_type(type);
const char *intrinsic;
LLVMValueRef res;

assert(type.floating);
/* using the double precision conversions is a bit more complicated */
assert(type.width == 32);

assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse2);

/* This is relying on MXCSR rounding mode, which should always be nearest. */
if (type.length == 1) {
LLVMTypeRef vec_type;
LLVMValueRef undef;
LLVMValueRef arg;
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);

vec_type = LLVMVectorType(bld->elem_type, 4);

intrinsic = "llvm.x86.sse.cvtss2si";

undef = LLVMGetUndef(vec_type);

arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");

res = lp_build_intrinsic_unary(bld->builder, intrinsic,
ret_type, arg);
}
else {
assert(type.width*type.length == 128);

intrinsic = "llvm.x86.sse2.cvtps2dq";

res = lp_build_intrinsic_unary(bld->builder, intrinsic,
ret_type, a);
}

return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
LLVMConstInt(LLVMInt32Type(), mode, 0));
return res;
}


/**
* Return the integer part of a float (vector) value. The returned value is
* a float (vector).
* Ex: trunc(-1.5) = 1.0
* Return the integer part of a float (vector) value (== round toward zero).
* The returned value is a float (vector).
* Ex: trunc(-1.5) = -1.0
*/
LLVMValueRef
lp_build_trunc(struct lp_build_context *bld,
@@ -962,8 +1115,10 @@ lp_build_trunc(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
@@ -990,8 +1145,10 @@ lp_build_round(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1016,8 +1173,10 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1042,8 +1201,10 @@ lp_build_ceil(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1068,9 +1229,9 @@ lp_build_fract(struct lp_build_context *bld,


/**
* Return the integer part of a float (vector) value. The returned value is
* an integer (vector).
* Ex: itrunc(-1.5) = 1
* Return the integer part of a float (vector) value (== round toward zero).
* The returned value is an integer (vector).
* Ex: itrunc(-1.5) = -1
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
@@ -1097,31 +1258,40 @@ lp_build_iround(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;

assert(type.floating);

assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
if (util_cpu_caps.has_sse2 &&
((type.width == 32) && (type.length == 1 || type.length == 4))) {
return lp_build_iround_nearest_sse2(bld, a);
}
else if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;
LLVMValueRef half;

/* get sign bit */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");

/* sign * 0.5 */
half = lp_build_const_vec(type, 0.5);
half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
half = LLVMBuildOr(bld->builder, sign, half, "");
half = LLVMBuildBitCast(bld->builder, half, vec_type, "");

if (type.sign) {
LLVMTypeRef vec_type = bld->vec_type;
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;

/* get sign bit */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");

/* sign * 0.5 */
half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
half = LLVMBuildOr(bld->builder, sign, half, "");
half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
}

res = LLVMBuildFAdd(bld->builder, a, half, "");
}
@@ -1142,37 +1312,42 @@ lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;

assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
}
else {
/* Take the sign bit and add it to 1 constant */
LLVMTypeRef vec_type = lp_build_vec_type(type);
unsigned mantissa = lp_mantissa(type);
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;
LLVMValueRef offset;

/* sign = a < 0 ? ~0 : 0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");

/* offset = -0.99999(9)f */
offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
offset = LLVMConstBitCast(offset, int_vec_type);

/* offset = a < 0 ? offset : 0.0f */
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");

res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
res = a;

if (type.sign) {
/* Take the sign bit and add it to 1 constant */
LLVMTypeRef vec_type = bld->vec_type;
unsigned mantissa = lp_mantissa(type);
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;
LLVMValueRef offset;

/* sign = a < 0 ? ~0 : 0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");

/* offset = -0.99999(9)f */
offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
offset = LLVMConstBitCast(offset, int_vec_type);

/* offset = a < 0 ? offset : 0.0f */
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");

res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res");
}
}

/* round to nearest (toward zero) */
@@ -1192,35 +1367,39 @@ lp_build_iceil(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;

assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef vec_type = bld->vec_type;
unsigned mantissa = lp_mantissa(type);
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;
LLVMValueRef offset;

/* sign = a < 0 ? 0 : ~0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
sign = LLVMBuildNot(bld->builder, sign, "iceil.not");

/* offset = 0.99999(9)f */
offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
offset = LLVMConstBitCast(offset, int_vec_type);

/* offset = a < 0 ? 0.0 : offset */
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
if (type.sign) {
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;

/* sign = a < 0 ? 0 : ~0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
sign = LLVMBuildNot(bld->builder, sign, "iceil.not");

/* offset = a < 0 ? 0.0 : offset */
offset = LLVMConstBitCast(offset, int_vec_type);
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
}

res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
@@ -1232,6 +1411,46 @@ lp_build_iceil(struct lp_build_context *bld,
}


/**
* Combined ifloor() & fract().
*
* Preferred to calling the functions separately, as it will ensure that the
* stratergy (floor() vs ifloor()) that results in less redundant work is used.
*/
void
lp_build_ifloor_fract(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef *out_ipart,
LLVMValueRef *out_fpart)
{
const struct lp_type type = bld->type;
LLVMValueRef ipart;

assert(type.floating);
assert(lp_check_value(type, a));

if (util_cpu_caps.has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) {
/*
* floor() is easier.
*/

ipart = lp_build_floor(bld, a);
*out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
*out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
}
else {
/*
* ifloor() is easier.
*/

*out_ipart = lp_build_ifloor(bld, a);
ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
*out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
}
}


LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
@@ -2040,6 +2259,71 @@ lp_build_exp2(struct lp_build_context *bld,
}


/**
* Extract the exponent of a IEEE-754 floating point value.
*
* Optionally apply an integer bias.
*
* Result is an integer value with
*
* ifloor(log2(x)) + bias
*/
LLVMValueRef
lp_build_extract_exponent(struct lp_build_context *bld,
LLVMValueRef x,
int bias)
{
const struct lp_type type = bld->type;
unsigned mantissa = lp_mantissa(type);
LLVMValueRef res;

assert(type.floating);

assert(lp_check_value(bld->type, x));

x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");

res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), "");
res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), "");

return res;
}


/**
* Extract the mantissa of the a floating.
*
* Result is a floating point value with
*
* x / floor(log2(x))
*/
LLVMValueRef
lp_build_extract_mantissa(struct lp_build_context *bld,
LLVMValueRef x)
{
const struct lp_type type = bld->type;
unsigned mantissa = lp_mantissa(type);
LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type);
LLVMValueRef res;

assert(lp_check_value(bld->type, x));

assert(type.floating);

x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");

/* res = x / 2**ipart */
res = LLVMBuildAnd(bld->builder, x, mantmask, "");
res = LLVMBuildOr(bld->builder, res, one, "");
res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");

return res;
}



/**
* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
* These coefficients can be generate with
@@ -2159,3 +2443,62 @@ lp_build_log2(struct lp_build_context *bld,
lp_build_log2_approx(bld, x, NULL, NULL, &res);
return res;
}


/**
* Faster (and less accurate) log2.
*
* log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x))
*
* Piece-wise linear approximation, with exact results when x is a
* power of two.
*
* See http://www.flipcode.com/archives/Fast_log_Function.shtml
*/
LLVMValueRef
lp_build_fast_log2(struct lp_build_context *bld,
LLVMValueRef x)
{
LLVMValueRef ipart;
LLVMValueRef fpart;

assert(lp_check_value(bld->type, x));

assert(bld->type.floating);

/* ipart = floor(log2(x)) - 1 */
ipart = lp_build_extract_exponent(bld, x, -1);
ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, "");

/* fpart = x / 2**ipart */
fpart = lp_build_extract_mantissa(bld, x);

/* ipart + fpart */
return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
}


/**
* Fast implementation of iround(log2(x)).
*
* Not an approximation -- it should give accurate results all the time.
*/
LLVMValueRef
lp_build_ilog2(struct lp_build_context *bld,
LLVMValueRef x)
{
LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2);
LLVMValueRef ipart;

assert(bld->type.floating);

assert(lp_check_value(bld->type, x));

/* x * 2^(0.5) i.e., add 0.5 to the log2(x) */
x = LLVMBuildFMul(bld->builder, x, sqrt2, "");

/* ipart = floor(log2(x) + 0.5) */
ipart = lp_build_extract_exponent(bld, x, 0);

return ipart;
}

+ 23
- 0
src/gallium/auxiliary/gallivm/lp_bld_arit.h Vedi File

@@ -171,6 +171,12 @@ LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);

void
lp_build_ifloor_fract(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef *out_ipart,
LLVMValueRef *out_fpart);

LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
@@ -208,10 +214,27 @@ LLVMValueRef
lp_build_exp2(struct lp_build_context *bld,
LLVMValueRef a);

LLVMValueRef
lp_build_extract_exponent(struct lp_build_context *bld,
LLVMValueRef x,
int bias);

LLVMValueRef
lp_build_extract_mantissa(struct lp_build_context *bld,
LLVMValueRef x);

LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef a);

LLVMValueRef
lp_build_fast_log2(struct lp_build_context *bld,
LLVMValueRef a);

LLVMValueRef
lp_build_ilog2(struct lp_build_context *bld,
LLVMValueRef x);

void
lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef x,

+ 178
- 40
src/gallium/auxiliary/gallivm/lp_bld_conv.c Vedi File

@@ -63,6 +63,7 @@

#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_cpu_detect.h"

#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
LLVMValueRef res;
unsigned mantissa;
unsigned n;
unsigned long long ubound;
unsigned long long mask;
double scale;
double bias;

assert(src_type.floating);
assert(dst_width <= src_type.width);
src_type.sign = FALSE;

mantissa = lp_mantissa(src_type);

/* We cannot carry more bits than the mantissa */
n = MIN2(mantissa, dst_width);
if (dst_width <= mantissa) {
/*
* Apply magic coefficients that will make the desired result to appear
* in the lowest significant bits of the mantissa, with correct rounding.
*
* This only works if the destination width fits in the mantissa.
*/

/* This magic coefficients will make the desired result to appear in the
* lowest significant bits of the mantissa.
*/
ubound = ((unsigned long long)1 << n);
mask = ubound - 1;
scale = (double)mask/ubound;
bias = (double)((unsigned long long)1 << (mantissa - n));
unsigned long long ubound;
unsigned long long mask;
double scale;
double bias;

res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
ubound = (1ULL << dst_width);
mask = ubound - 1;
scale = (double)mask/ubound;
bias = (double)(1ULL << (mantissa - dst_width));

if(dst_width > n) {
int shift = dst_width - n;
res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), "");
res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
}
else if (dst_width == (mantissa + 1)) {
/*
* The destination width matches exactly what can be represented in
* floating point (i.e., mantissa + 1 bits). So do a straight
* multiplication followed by casting. No further rounding is necessary.
*/

double scale;

/* TODO: Fill in the empty lower bits for additional precision? */
/* YES: this fixes progs/trivial/tri-z-eq.c.
* Otherwise vertex Z=1.0 values get converted to something like
* 0xfffffb00 and the test for equality with 0xffffffff fails.
scale = (double)((1ULL << dst_width) - 1);

res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
}
else {
/*
* The destination exceeds what can be represented in the floating point.
* So multiply by the largest power two we get away with, and when
* subtract the most significant bit to rescale to normalized values.
*
* The largest power of two factor we can get away is
* (1 << (src_type.width - 1)), because we need to use signed . In theory it
* should be (1 << (src_type.width - 2)), but IEEE 754 rules states
* INT_MIN should be returned in FPToSI, which is the correct result for
* values near 1.0!
*
* This means we get (src_type.width - 1) correct bits for values near 0.0,
* and (mantissa + 1) correct bits for values near 1.0. Equally or more
* important, we also get exact results for 0.0 and 1.0.
*/
#if 0
{
LLVMValueRef msb;
msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
res = LLVMBuildOr(builder, res, msb, "");
}
#elif 0
while(shift > 0) {
res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
shift -= n;
n *= 2;

unsigned n = MIN2(src_type.width - 1, dst_width);

double scale = (double)(1ULL << n);
unsigned lshift = dst_width - n;
unsigned rshift = n;
LLVMValueRef lshifted;
LLVMValueRef rshifted;

res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
res = LLVMBuildFPToSI(builder, res, int_vec_type, "");

/*
* Align the most significant bit to its final place.
*
* This will cause 1.0 to overflow to 0, but the later adjustment will
* get it right.
*/
if (lshift) {
lshifted = LLVMBuildShl(builder, res,
lp_build_const_int_vec(src_type, lshift), "");
} else {
lshifted = res;
}
#endif

/*
* Align the most significant bit to the right.
*/
rshifted = LLVMBuildAShr(builder, res,
lp_build_const_int_vec(src_type, rshift), "");

/*
* Subtract the MSB to the LSB, therefore re-scaling from
* (1 << dst_width) to ((1 << dst_width) - 1).
*/

res = LLVMBuildSub(builder, lshifted, rshifted, "");
}
else
res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");

return res;
}
@@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,

assert(dst_type.floating);

/* Special-case int8->float, though most cases could be handled
* this way:
*/
if (src_width == 8) {
scale = 1.0/255.0;
res = LLVMBuildSIToFP(builder, src, vec_type, "");
res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
return res;
}

mantissa = lp_mantissa(dst_type);

n = MIN2(mantissa, src_width);
@@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder,
}
num_tmps = num_srcs;


/* Special case 4x4f --> 1x16ub
*/
if (src_type.floating == 1 &&
src_type.fixed == 0 &&
src_type.sign == 1 &&
src_type.norm == 0 &&
src_type.width == 32 &&
src_type.length == 4 &&

dst_type.floating == 0 &&
dst_type.fixed == 0 &&
dst_type.sign == 0 &&
dst_type.norm == 1 &&
dst_type.width == 8 &&
dst_type.length == 16 &&

util_cpu_caps.has_sse2)
{
int i;

for (i = 0; i < num_dsts; i++, src += 4) {
struct lp_type int16_type = dst_type;
struct lp_type int32_type = dst_type;
LLVMValueRef lo, hi;
LLVMValueRef src_int0;
LLVMValueRef src_int1;
LLVMValueRef src_int2;
LLVMValueRef src_int3;
LLVMTypeRef int16_vec_type;
LLVMTypeRef int32_vec_type;
LLVMTypeRef src_vec_type;
LLVMTypeRef dst_vec_type;
LLVMValueRef const_255f;
LLVMValueRef a, b, c, d;

int16_type.width *= 2;
int16_type.length /= 2;
int16_type.sign = 1;

int32_type.width *= 4;
int32_type.length /= 4;
int32_type.sign = 1;

src_vec_type = lp_build_vec_type(src_type);
dst_vec_type = lp_build_vec_type(dst_type);
int16_vec_type = lp_build_vec_type(int16_type);
int32_vec_type = lp_build_vec_type(int32_type);

const_255f = lp_build_const_vec(src_type, 255.0f);

a = LLVMBuildFMul(builder, src[0], const_255f, "");
b = LLVMBuildFMul(builder, src[1], const_255f, "");
c = LLVMBuildFMul(builder, src[2], const_255f, "");
d = LLVMBuildFMul(builder, src[3], const_255f, "");

{
struct lp_build_context bld;

bld.builder = builder;
bld.type = src_type;
bld.vec_type = src_vec_type;
bld.int_elem_type = lp_build_elem_type(int32_type);
bld.int_vec_type = int32_vec_type;
bld.undef = lp_build_undef(src_type);
bld.zero = lp_build_zero(src_type);
bld.one = lp_build_one(src_type);

src_int0 = lp_build_iround(&bld, a);
src_int1 = lp_build_iround(&bld, b);
src_int2 = lp_build_iround(&bld, c);
src_int3 = lp_build_iround(&bld, d);
}
/* relying on clamping behavior of sse2 intrinsics here */
lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1);
hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3);
dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi);
}
return;
}

/*
* Clamp if necessary
*/

+ 14
- 5
src/gallium/auxiliary/gallivm/lp_bld_debug.c Vedi File

@@ -57,6 +57,8 @@ lp_disassemble(const void* func)
#ifdef HAVE_UDIS86
ud_t ud_obj;
uint64_t max_jmp_pc;
uint inst_no;
boolean emit_addrs = TRUE, emit_line_nos = FALSE;

ud_init(&ud_obj);

@@ -76,13 +78,18 @@ lp_disassemble(const void* func)

while (ud_disassemble(&ud_obj)) {

if (emit_addrs) {
#ifdef PIPE_ARCH_X86
debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
#endif
#ifdef PIPE_ARCH_X86_64
debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
#endif

}
else if (emit_line_nos) {
debug_printf("%6d:\t", inst_no);
inst_no++;
}
#if 0
debug_printf("%-16s ", ud_insn_hex(&ud_obj));
#endif
@@ -115,8 +122,10 @@ lp_disassemble(const void* func)
}
}

if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
ud_obj.mnemonic == UD_Iinvalid)
if (ud_obj.mnemonic == UD_Iinvalid ||
(ud_insn_off(&ud_obj) >= max_jmp_pc &&
(ud_obj.mnemonic == UD_Iret ||
ud_obj.mnemonic == UD_Ijmp)))
break;
}


+ 6
- 5
src/gallium/auxiliary/gallivm/lp_bld_debug.h Vedi File

@@ -36,11 +36,12 @@
#include "util/u_string.h"


#define GALLIVM_DEBUG_TGSI 0x1
#define GALLIVM_DEBUG_IR 0x2
#define GALLIVM_DEBUG_ASM 0x4
#define GALLIVM_DEBUG_NO_OPT 0x8
#define GALLIVM_DEBUG_PERF 0x10
#define GALLIVM_DEBUG_TGSI (1 << 0)
#define GALLIVM_DEBUG_IR (1 << 1)
#define GALLIVM_DEBUG_ASM (1 << 2)
#define GALLIVM_DEBUG_NO_OPT (1 << 3)
#define GALLIVM_DEBUG_PERF (1 << 4)
#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5)


#ifdef DEBUG

+ 87
- 517
src/gallium/auxiliary/gallivm/lp_bld_flow.c Vedi File

@@ -38,273 +38,15 @@
#include "lp_bld_flow.h"


#define LP_BUILD_FLOW_MAX_VARIABLES 64
#define LP_BUILD_FLOW_MAX_DEPTH 32

/**
* Enumeration of all possible flow constructs.
*/
enum lp_build_flow_construct_kind {
LP_BUILD_FLOW_SCOPE,
LP_BUILD_FLOW_SKIP,
LP_BUILD_FLOW_IF
};


/**
* Variable declaration scope.
*/
struct lp_build_flow_scope
{
/** Number of variables declared in this scope */
unsigned num_variables;
};


/**
* Early exit. Useful to skip to the end of a function or block when
* the execution mask becomes zero or when there is an error condition.
*/
struct lp_build_flow_skip
{
/** Block to skip to */
LLVMBasicBlockRef block;

/** Number of variables declared at the beginning */
unsigned num_variables;

LLVMValueRef *phi; /**< array [num_variables] */
};


/**
* if/else/endif.
*/
struct lp_build_flow_if
{
unsigned num_variables;

LLVMValueRef *phi; /**< array [num_variables] */

LLVMValueRef condition;
LLVMBasicBlockRef entry_block, true_block, false_block, merge_block;
};


/**
* Union of all possible flow constructs' data
*/
union lp_build_flow_construct_data
{
struct lp_build_flow_scope scope;
struct lp_build_flow_skip skip;
struct lp_build_flow_if ifthen;
};


/**
* Element of the flow construct stack.
*/
struct lp_build_flow_construct
{
enum lp_build_flow_construct_kind kind;
union lp_build_flow_construct_data data;
};


/**
* All necessary data to generate LLVM control flow constructs.
* Insert a new block, right where builder is pointing to.
*
* Besides keeping track of the control flow construct themselves we also
* need to keep track of variables in order to generate SSA Phi values.
*/
struct lp_build_flow_context
{
LLVMBuilderRef builder;

/**
* Control flow stack.
*/
struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
unsigned num_constructs;

/**
* Variable stack
*/
LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
unsigned num_variables;
};


struct lp_build_flow_context *
lp_build_flow_create(LLVMBuilderRef builder)
{
struct lp_build_flow_context *flow;

flow = CALLOC_STRUCT(lp_build_flow_context);
if(!flow)
return NULL;

flow->builder = builder;

return flow;
}


void
lp_build_flow_destroy(struct lp_build_flow_context *flow)
{
assert(flow->num_constructs == 0);
assert(flow->num_variables == 0);
FREE(flow);
}


/**
* Begin/push a new flow control construct, such as a loop, skip block
* or variable scope.
*/
static union lp_build_flow_construct_data *
lp_build_flow_push(struct lp_build_flow_context *flow,
enum lp_build_flow_construct_kind kind)
{
assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
return NULL;

flow->constructs[flow->num_constructs].kind = kind;
return &flow->constructs[flow->num_constructs++].data;
}


/**
* Return the current/top flow control construct on the stack.
* \param kind the expected type of the top-most construct
*/
static union lp_build_flow_construct_data *
lp_build_flow_peek(struct lp_build_flow_context *flow,
enum lp_build_flow_construct_kind kind)
{
assert(flow->num_constructs);
if(!flow->num_constructs)
return NULL;

assert(flow->constructs[flow->num_constructs - 1].kind == kind);
if(flow->constructs[flow->num_constructs - 1].kind != kind)
return NULL;

return &flow->constructs[flow->num_constructs - 1].data;
}


/**
* End/pop the current/top flow control construct on the stack.
* \param kind the expected type of the top-most construct
*/
static union lp_build_flow_construct_data *
lp_build_flow_pop(struct lp_build_flow_context *flow,
enum lp_build_flow_construct_kind kind)
{
assert(flow->num_constructs);
if(!flow->num_constructs)
return NULL;

assert(flow->constructs[flow->num_constructs - 1].kind == kind);
if(flow->constructs[flow->num_constructs - 1].kind != kind)
return NULL;

return &flow->constructs[--flow->num_constructs].data;
}


/**
* Begin a variable scope.
* This is useful important not only for aesthetic reasons, but also for
* performance reasons, as frequently run blocks should be laid out next to
* each other and fall-throughs maximized.
*
* See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp.
*
*/
void
lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
{
struct lp_build_flow_scope *scope;

scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope;
if(!scope)
return;

scope->num_variables = 0;
}


/**
* Declare a variable.
*
* A variable is a named entity which can have different LLVMValueRef's at
* different points of the program. This is relevant for control flow because
* when there are multiple branches to a same location we need to replace
* the variable's value with a Phi function as explained in
* http://en.wikipedia.org/wiki/Static_single_assignment_form .
*
* We keep track of variables by keeping around a pointer to where they're
* current.
*
* There are a few cautions to observe:
*
* - Variable's value must not be NULL. If there is no initial value then
* LLVMGetUndef() should be used.
*
* - Variable's value must be kept up-to-date. If the variable is going to be
* modified by a function then a pointer should be passed so that its value
* is accurate. Failure to do this will cause some of the variables'
* transient values to be lost, leading to wrong results.
*
* - A program should be written from top to bottom, by always appending
* instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
* modifying existing statements will most likely lead to wrong results.
*
*/
void
lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
LLVMValueRef *variable)
{
struct lp_build_flow_scope *scope;

scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope;
if(!scope)
return;

assert(*variable);
if(!*variable)
return;

assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
return;

flow->variables[flow->num_variables++] = variable;
++scope->num_variables;
}


void
lp_build_flow_scope_end(struct lp_build_flow_context *flow)
{
struct lp_build_flow_scope *scope;

scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope;
if(!scope)
return;

assert(flow->num_variables >= scope->num_variables);
if(flow->num_variables < scope->num_variables) {
flow->num_variables = 0;
return;
}

flow->num_variables -= scope->num_variables;
}


/**
* Note: this function has no dependencies on the flow code and could
* be used elsewhere.
*/
@@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
}


static LLVMBasicBlockRef
lp_build_flow_insert_block(struct lp_build_flow_context *flow)
{
return lp_build_insert_new_block(flow->builder, "");
}


/**
* Begin a "skip" block. Inside this block we can test a condition and
* skip to the end of the block if the condition is false.
*/
void
lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
lp_build_flow_skip_begin(struct lp_build_skip_context *skip,
LLVMBuilderRef builder)
{
struct lp_build_flow_skip *skip;
LLVMBuilderRef builder;
unsigned i;

skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
if(!skip)
return;
skip->builder = builder;

/* create new basic block */
skip->block = lp_build_flow_insert_block(flow);

skip->num_variables = flow->num_variables;
if(!skip->num_variables) {
skip->phi = NULL;
return;
}

/* Allocate a Phi node for each variable in this skip scope */
skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
if(!skip->phi) {
skip->num_variables = 0;
return;
}

builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, skip->block);

/* create a Phi node for each variable */
for(i = 0; i < skip->num_variables; ++i)
skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");

LLVMDisposeBuilder(builder);
skip->block = lp_build_insert_new_block(skip->builder, "skip");
}


@@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
* skip block if the condition is true.
*/
void
lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip,
LLVMValueRef cond)
{
struct lp_build_flow_skip *skip;
LLVMBasicBlockRef current_block;
LLVMBasicBlockRef new_block;
unsigned i;

skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
if(!skip)
return;

current_block = LLVMGetInsertBlock(flow->builder);

new_block = lp_build_flow_insert_block(flow);

/* for each variable, update the Phi node with a (variable, block) pair */
for(i = 0; i < skip->num_variables; ++i) {
assert(*flow->variables[i]);
assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
}
new_block = lp_build_insert_new_block(skip->builder, "");

/* if cond is true, goto skip->block, else goto new_block */
LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
LLVMBuildCondBr(skip->builder, cond, skip->block, new_block);

LLVMPositionBuilderAtEnd(flow->builder, new_block);
LLVMPositionBuilderAtEnd(skip->builder, new_block);
}


void
lp_build_flow_skip_end(struct lp_build_flow_context *flow)
lp_build_flow_skip_end(struct lp_build_skip_context *skip)
{
struct lp_build_flow_skip *skip;
LLVMBasicBlockRef current_block;
unsigned i;

skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
if(!skip)
return;

current_block = LLVMGetInsertBlock(flow->builder);

/* add (variable, block) tuples to the phi nodes */
for(i = 0; i < skip->num_variables; ++i) {
assert(*flow->variables[i]);
assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
*flow->variables[i] = skip->phi[i];
}

/* goto block */
LLVMBuildBr(flow->builder, skip->block);
LLVMPositionBuilderAtEnd(flow->builder, skip->block);

FREE(skip->phi);
LLVMBuildBr(skip->builder, skip->block);
LLVMPositionBuilderAtEnd(skip->builder, skip->block);
}


/**
* Check if the mask predicate is zero. If so, jump to the end of the block.
*/
static void
void
lp_build_mask_check(struct lp_build_mask_context *mask)
{
LLVMBuilderRef builder = mask->flow->builder;
LLVMBuilderRef builder = mask->skip.builder;
LLVMValueRef value;
LLVMValueRef cond;

value = lp_build_mask_value(mask);

/* cond = (mask == 0) */
cond = LLVMBuildICmp(builder,
LLVMIntEQ,
LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
LLVMBuildBitCast(builder, value, mask->reg_type, ""),
LLVMConstNull(mask->reg_type),
"");

/* if cond, goto end of block */
lp_build_flow_skip_cond_break(mask->flow, cond);
lp_build_flow_skip_cond_break(&mask->skip, cond);
}


@@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask)
*/
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
struct lp_build_flow_context *flow,
LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef value)
{
memset(mask, 0, sizeof *mask);

mask->flow = flow;
mask->reg_type = LLVMIntType(type.width * type.length);
mask->value = value;
mask->var = lp_build_alloca(builder,
lp_build_int_vec_type(type),
"execution_mask");

lp_build_flow_scope_begin(flow);
lp_build_flow_scope_declare(flow, &mask->value);
lp_build_flow_skip_begin(flow);
LLVMBuildStore(builder, value, mask->var);

lp_build_mask_check(mask);
lp_build_flow_skip_begin(&mask->skip, builder);
}


LLVMValueRef
lp_build_mask_value(struct lp_build_mask_context *mask)
{
return LLVMBuildLoad(mask->skip.builder, mask->var, "");
}


@@ -504,9 +185,10 @@ void
lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef value)
{
mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");

lp_build_mask_check(mask);
value = LLVMBuildAnd(mask->skip.builder,
lp_build_mask_value(mask),
value, "");
LLVMBuildStore(mask->skip.builder, value, mask->var);
}


@@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef
lp_build_mask_end(struct lp_build_mask_context *mask)
{
lp_build_flow_skip_end(mask->flow);
lp_build_flow_scope_end(mask->flow);
return mask->value;
lp_build_flow_skip_end(&mask->skip);
return lp_build_mask_value(mask);
}


@@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder,
LLVMValueRef start,
struct lp_build_loop_state *state)
{
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
state->block = lp_build_insert_new_block(builder, "loop_begin");

state->block = LLVMAppendBasicBlock(function, "loop");
state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter");

LLVMBuildStore(builder, start, state->counter_var);

LLVMBuildBr(builder, state->block);

LLVMPositionBuilderAtEnd(builder, state->block);

state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");

LLVMAddIncoming(state->counter, &start, &block, 1);

state->counter = LLVMBuildLoad(builder, state->counter_var, "");
}


void
lp_build_loop_end(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
struct lp_build_loop_state *state)
{
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
LLVMValueRef next;
LLVMValueRef cond;
LLVMBasicBlockRef after_block;

if (!step)
step = LLVMConstInt(LLVMTypeOf(end), 1, 0);

next = LLVMBuildAdd(builder, state->counter, step, "");

cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");

after_block = LLVMAppendBasicBlock(function, "");

LLVMBuildCondBr(builder, cond, after_block, state->block);

LLVMAddIncoming(state->counter, &next, &block, 1);

LLVMPositionBuilderAtEnd(builder, after_block);
}

void
lp_build_loop_end_cond(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
int llvm_cond,
LLVMIntPredicate llvm_cond,
struct lp_build_loop_state *state)
{
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
LLVMValueRef next;
LLVMValueRef cond;
LLVMBasicBlockRef after_block;
@@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,

next = LLVMBuildAdd(builder, state->counter, step, "");

LLVMBuildStore(builder, next, state->counter_var);

cond = LLVMBuildICmp(builder, llvm_cond, next, end, "");

after_block = LLVMAppendBasicBlock(function, "");
after_block = lp_build_insert_new_block(builder, "loop_end");

LLVMBuildCondBr(builder, cond, after_block, state->block);

LLVMAddIncoming(state->counter, &next, &block, 1);

LLVMPositionBuilderAtEnd(builder, after_block);

state->counter = LLVMBuildLoad(builder, state->counter_var, "");
}


void
lp_build_loop_end(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
struct lp_build_loop_state *state)
{
lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state);
}


@@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,

Is built with:

LLVMValueRef x = LLVMGetUndef(); // or something else
// x needs an alloca variable
x = lp_build_alloca(builder, type, "x");

flow = lp_build_flow_create(builder);

lp_build_flow_scope_begin(flow);
lp_build_if(ctx, builder, cond);
LLVMBuildStore(LLVMBuildAdd(1, 2), x);
lp_build_else(ctx);
LLVMBuildStore(LLVMBuildAdd(2, 3). x);
lp_build_endif(ctx);

// x needs a phi node
lp_build_flow_scope_declare(flow, &x);

lp_build_if(ctx, flow, builder, cond);
x = LLVMAdd(1, 2);
lp_build_else(ctx);
x = LLVMAdd(2, 3);
lp_build_endif(ctx);

lp_build_flow_scope_end(flow);

lp_build_flow_destroy(flow);
*/


@@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
* Begin an if/else/endif construct.
*/
void
lp_build_if(struct lp_build_if_state *ctx,
struct lp_build_flow_context *flow,
lp_build_if(struct lp_build_if_state *ifthen,
LLVMBuilderRef builder,
LLVMValueRef condition)
{
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
struct lp_build_flow_if *ifthen;
unsigned i;

memset(ctx, 0, sizeof(*ctx));
ctx->builder = builder;
ctx->flow = flow;

/* push/create new scope */
ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen;
assert(ifthen);

ifthen->num_variables = flow->num_variables;
memset(ifthen, 0, sizeof *ifthen);
ifthen->builder = builder;
ifthen->condition = condition;
ifthen->entry_block = block;

/* create a Phi node for each variable in this flow scope */
ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi));
if (!ifthen->phi) {
ifthen->num_variables = 0;
return;
}

/* create endif/merge basic block for the phi functions */
ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block");
LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);

/* create a phi node for each variable */
for (i = 0; i < flow->num_variables; i++) {
ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");

/* add add the initial value of the var from the entry block */
if (!LLVMIsUndef(*flow->variables[i]))
LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
&ifthen->entry_block, 1);
}

/* create/insert true_block before merge_block */
ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block");
@@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx,
* Begin else-part of a conditional
*/
void
lp_build_else(struct lp_build_if_state *ctx)
lp_build_else(struct lp_build_if_state *ifthen)
{
struct lp_build_flow_context *flow = ctx->flow;
struct lp_build_flow_if *ifthen;
unsigned i;

ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen;
assert(ifthen);

/* for each variable, update the Phi node with a (variable, block) pair */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
for (i = 0; i < flow->num_variables; i++) {
assert(*flow->variables[i]);
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);
}
/* Append an unconditional Br(anch) instruction on the true_block */
LLVMBuildBr(ifthen->builder, ifthen->merge_block);

/* create/insert false_block before the merge block */
ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block");

/* successive code goes into the else block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block);
}


@@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx)
* End a conditional.
*/
void
lp_build_endif(struct lp_build_if_state *ctx)
lp_build_endif(struct lp_build_if_state *ifthen)
{
struct lp_build_flow_context *flow = ctx->flow;
struct lp_build_flow_if *ifthen;
LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
unsigned i;

ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
assert(ifthen);

/* Insert branch to the merge block from current block */
LLVMBuildBr(ctx->builder, ifthen->merge_block);
LLVMBuildBr(ifthen->builder, ifthen->merge_block);

if (ifthen->false_block) {
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
/* for each variable, update the Phi node with a (variable, block) pair */
for (i = 0; i < flow->num_variables; i++) {
assert(*flow->variables[i]);
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
/* replace the variable ref with the phi function */
*flow->variables[i] = ifthen->phi[i];
}
}
else {
/* no else clause */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
for (i = 0; i < flow->num_variables; i++) {
assert(*flow->variables[i]);
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);

/* replace the variable ref with the phi function */
*flow->variables[i] = ifthen->phi[i];
}
}

FREE(ifthen->phi);

/***
*** Now patch in the various branch instructions.
***/
/*
* Now patch in the various branch instructions.
*/

/* Insert the conditional branch instruction at the end of entry_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block);
LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block);
if (ifthen->false_block) {
/* we have an else clause */
LLVMBuildCondBr(ctx->builder, ifthen->condition,
LLVMBuildCondBr(ifthen->builder, ifthen->condition,
ifthen->true_block, ifthen->false_block);
}
else {
/* no else clause */
LLVMBuildCondBr(ctx->builder, ifthen->condition,
LLVMBuildCondBr(ifthen->builder, ifthen->condition,
ifthen->true_block, ifthen->merge_block);
}

/* Insert branch from end of true_block to merge_block */
if (ifthen->false_block) {
/* Append an unconditional Br(anch) instruction on the true_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
LLVMBuildBr(ctx->builder, ifthen->merge_block);
}
else {
/* No else clause.
* Note that we've already inserted the branch at the end of
* true_block. See the very first LLVMBuildBr() call in this function.
*/
}

/* Resume building code at end of the ifthen->merge_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block);
}


@@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder,
}

res = LLVMBuildAlloca(first_builder, type, name);
LLVMBuildStore(builder, LLVMConstNull(type), res);

LLVMDisposeBuilder(first_builder);


+ 33
- 26
src/gallium/auxiliary/gallivm/lp_bld_flow.h Vedi File

@@ -41,52 +41,49 @@
struct lp_type;


struct lp_build_flow_context;


struct lp_build_flow_context *
lp_build_flow_create(LLVMBuilderRef builder);

void
lp_build_flow_destroy(struct lp_build_flow_context *flow);

void
lp_build_flow_scope_begin(struct lp_build_flow_context *flow);

void
lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
LLVMValueRef *variable);
/**
* Early exit. Useful to skip to the end of a function or block when
* the execution mask becomes zero or when there is an error condition.
*/
struct lp_build_skip_context
{
LLVMBuilderRef builder;

void
lp_build_flow_scope_end(struct lp_build_flow_context *flow);
/** Block to skip to */
LLVMBasicBlockRef block;
};

void
lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
lp_build_flow_skip_begin(struct lp_build_skip_context *ctx,
LLVMBuilderRef builder);

void
lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx,
LLVMValueRef cond);

void
lp_build_flow_skip_end(struct lp_build_flow_context *flow);
lp_build_flow_skip_end(struct lp_build_skip_context *ctx);


struct lp_build_mask_context
{
struct lp_build_flow_context *flow;
struct lp_build_skip_context skip;

LLVMTypeRef reg_type;

LLVMValueRef value;
LLVMValueRef var;
};


void
lp_build_mask_begin(struct lp_build_mask_context *mask,
struct lp_build_flow_context *flow,
LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef value);

LLVMValueRef
lp_build_mask_value(struct lp_build_mask_context *mask);

/**
* Bitwise AND the mask with the given value, if a previous mask was set.
*/
@@ -94,6 +91,9 @@ void
lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef value);

void
lp_build_mask_check(struct lp_build_mask_context *mask);

LLVMValueRef
lp_build_mask_end(struct lp_build_mask_context *mask);

@@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask);
struct lp_build_loop_state
{
LLVMBasicBlockRef block;
LLVMValueRef counter_var;
LLVMValueRef counter;
};

@@ -128,22 +129,28 @@ void
lp_build_loop_end_cond(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
int cond, /* LLVM condition */
LLVMIntPredicate cond,
struct lp_build_loop_state *state);




/**
* if/else/endif.
*/
struct lp_build_if_state
{
LLVMBuilderRef builder;
struct lp_build_flow_context *flow;
LLVMValueRef condition;
LLVMBasicBlockRef entry_block;
LLVMBasicBlockRef true_block;
LLVMBasicBlockRef false_block;
LLVMBasicBlockRef merge_block;
};


void
lp_build_if(struct lp_build_if_state *ctx,
struct lp_build_flow_context *flow,
LLVMBuilderRef builder,
LLVMValueRef condition);


+ 55
- 9
src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c Vedi File

@@ -35,6 +35,7 @@


#include "util/u_format.h"
#include "util/u_cpu_detect.h"

#include "lp_bld_arit.h"
#include "lp_bld_type.h"
@@ -42,7 +43,7 @@
#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_logic.h"

/**
* Extract Y, U, V channels from packed UYVY.
@@ -59,7 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
LLVMValueRef *v)
{
struct lp_type type;
LLVMValueRef shift, mask;
LLVMValueRef mask;

memset(&type, 0, sizeof type);
type.width = 32;
@@ -69,14 +70,37 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
assert(lp_check_value(type, i));

/*
* y = (uyvy >> 16*i) & 0xff
* y = (uyvy >> (16*i + 8)) & 0xff
* u = (uyvy ) & 0xff
* v = (uyvy >> 16 ) & 0xff
*/

shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
*y = LLVMBuildLShr(builder, packed, shift, "");
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* Avoid shift with per-element count.
* No support on x86, gets translated to roughly 5 instructions
* per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support).
*/
if (util_cpu_caps.has_sse2 && n == 4) {
LLVMValueRef sel, tmp, tmp2;
struct lp_build_context bld32;

lp_build_context_init(&bld32, builder, type);

tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
*y = lp_build_select(&bld32, sel, tmp, tmp2);
} else
#endif
{
LLVMValueRef shift;
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
*y = LLVMBuildLShr(builder, packed, shift, "");
}

*u = packed;
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");

@@ -103,7 +127,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
LLVMValueRef *v)
{
struct lp_type type;
LLVMValueRef shift, mask;
LLVMValueRef mask;

memset(&type, 0, sizeof type);
type.width = 32;
@@ -118,8 +142,30 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
* v = (yuyv >> 24 ) & 0xff
*/

shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
*y = LLVMBuildLShr(builder, packed, shift, "");
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* Avoid shift with per-element count.
* No support on x86, gets translated to roughly 5 instructions
* per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support).
*/
if (util_cpu_caps.has_sse2 && n == 4) {
LLVMValueRef sel, tmp;
struct lp_build_context bld32;

lp_build_context_init(&bld32, builder, type);

tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
*y = lp_build_select(&bld32, sel, packed, tmp);
} else
#endif
{
LLVMValueRef shift;
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
*y = LLVMBuildLShr(builder, packed, shift, "");
}

*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");


+ 1
- 0
src/gallium/auxiliary/gallivm/lp_bld_init.c Vedi File

@@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
{ "asm", GALLIVM_DEBUG_ASM, NULL },
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
{ "perf", GALLIVM_DEBUG_PERF, NULL },
{ "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL },
DEBUG_NAMED_VALUE_END
};


+ 6
- 0
src/gallium/auxiliary/gallivm/lp_bld_init.h Vedi File

@@ -47,4 +47,10 @@ lp_build_init(void);
extern void
lp_func_delete_body(LLVMValueRef func);


extern LLVMValueRef
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
const char *Name);


#endif /* !LP_BLD_INIT_H */

+ 22
- 4
src/gallium/auxiliary/gallivm/lp_bld_logic.c Vedi File

@@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder,
if(func == PIPE_FUNC_ALWAYS)
return ones;

/* TODO: optimize the constant case */
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* There are no unsigned integer comparison instructions in SSE.
*/

/* XXX: It is not clear if we should use the ordered or unordered operators */
if (!type.floating && !type.sign &&
type.width * type.length == 128 &&
util_cpu_caps.has_sse2 &&
(func == PIPE_FUNC_LESS ||
func == PIPE_FUNC_LEQUAL ||
func == PIPE_FUNC_GREATER ||
func == PIPE_FUNC_GEQUAL) &&
(gallivm_debug & GALLIVM_DEBUG_PERF)) {
debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
__FUNCTION__, type.length, type.width);
}
#endif

#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder,
#endif
#endif /* HAVE_LLVM < 0x0207 */

/* XXX: It is not clear if we should use the ordered or unordered operators */

if(type.floating) {
LLVMRealPredicate op;
switch(func) {
@@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld,
LLVMTypeRef arg_type;
LLVMValueRef args[3];

if (type.width == 64) {
if (type.floating &&
type.width == 64) {
intrinsic = "llvm.x86.sse41.blendvpd";
arg_type = LLVMVectorType(LLVMDoubleType(), 2);
} else if (type.width == 32) {
} else if (type.floating &&
type.width == 32) {
intrinsic = "llvm.x86.sse41.blendvps";
arg_type = LLVMVectorType(LLVMFloatType(), 4);
} else {

+ 10
- 0
src/gallium/auxiliary/gallivm/lp_bld_misc.cpp Vedi File

@@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF)
llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
func->deleteBody();
}


extern "C"
LLVMValueRef
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
const char *Name)
{
return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
}


+ 21
- 0
src/gallium/auxiliary/gallivm/lp_bld_printf.c Vedi File

@@ -29,6 +29,8 @@

#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "lp_bld_const.h"
#include "lp_bld_printf.h"


@@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...)
return LLVMBuildCall(builder, func_printf, params, argcount + 1, "");
}



/**
* Print a float[4] vector.
*/
LLVMValueRef
lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec)
{
char format[1000];
LLVMValueRef x, y, z, w;

x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), "");
y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), "");
z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), "");
w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), "");

util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg);
return lp_build_printf(builder, format, x, y, z, w);
}

+ 4
- 0
src/gallium/auxiliary/gallivm/lp_bld_printf.h Vedi File

@@ -35,5 +35,9 @@
LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len);
LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...);

LLVMValueRef
lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec);


#endif


+ 18
- 10
src/gallium/auxiliary/gallivm/lp_bld_quad.c Vedi File

@@ -81,11 +81,15 @@ LLVMValueRef
lp_build_scalar_ddx(struct lp_build_context *bld,
LLVMValueRef a)
{
LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
return lp_build_sub(bld, a_right, a_left);
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0);
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left");
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right");
if (bld->type.floating)
return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx");
else
return LLVMBuildSub(bld->builder, a_right, a_left, "ddx");
}


@@ -93,9 +97,13 @@ LLVMValueRef
lp_build_scalar_ddy(struct lp_build_context *bld,
LLVMValueRef a)
{
LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
return lp_build_sub(bld, a_bottom, a_top);
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0);
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top");
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom");
if (bld->type.floating)
return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy");
else
return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy");
}

+ 564
- 191
src/gallium/auxiliary/gallivm/lp_bld_sample.c Vedi File

@@ -39,12 +39,52 @@
#include "lp_bld_arit.h"
#include "lp_bld_const.h"
#include "lp_bld_debug.h"
#include "lp_bld_printf.h"
#include "lp_bld_flow.h"
#include "lp_bld_sample.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_type.h"


/*
* Bri-linear factor. Should be greater than one.
*/
#define BRILINEAR_FACTOR 2


/**
* Does the given texture wrap mode allow sampling the texture border color?
* XXX maybe move this into gallium util code.
*/
boolean
lp_sampler_wrap_mode_uses_border_color(unsigned mode,
unsigned min_img_filter,
unsigned mag_img_filter)
{
switch (mode) {
case PIPE_TEX_WRAP_REPEAT:
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_REPEAT:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
return FALSE;
case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
return FALSE;
} else {
return TRUE;
}
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
return TRUE;
default:
assert(0 && "unexpected wrap mode");
return FALSE;
}
}


/**
* Initialize lp_sampler_static_state object with the gallium sampler
* and texture state.
@@ -93,31 +133,40 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
state->mag_img_filter = sampler->mag_img_filter;
if (view->last_level) {

if (view->last_level && sampler->max_lod > 0.0f) {
state->min_mip_filter = sampler->min_mip_filter;
} else {
state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}

if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
if (sampler->lod_bias != 0.0f) {
state->lod_bias_non_zero = 1;
}

/* If min_lod == max_lod we can greatly simplify mipmap selection.
* This is a case that occurs during automatic mipmap generation.
*/
if (sampler->min_lod == sampler->max_lod) {
state->min_max_lod_equal = 1;
} else {
if (sampler->min_lod > 0.0f) {
state->apply_min_lod = 1;
}

if (sampler->max_lod < (float)view->last_level) {
state->apply_max_lod = 1;
}
}
}

state->compare_mode = sampler->compare_mode;
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
state->compare_func = sampler->compare_func;
}

state->normalized_coords = sampler->normalized_coords;
state->lod_bias = sampler->lod_bias;
if (!view->last_level &&
sampler->min_img_filter == sampler->mag_img_filter) {
state->min_lod = 0.0f;
state->max_lod = 0.0f;
} else {
state->min_lod = MAX2(sampler->min_lod, 0.0f);
state->max_lod = sampler->max_lod;
}
state->border_color[0] = sampler->border_color[0];
state->border_color[1] = sampler->border_color[1];
state->border_color[2] = sampler->border_color[2];
state->border_color[3] = sampler->border_color[3];

/*
* FIXME: Handle the remainder of pipe_sampler_view.
@@ -125,6 +174,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
}


/**
* Generate code to compute coordinate gradient (rho).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
*
* XXX: The resulting rho is scalar, so we ignore all but the first element of
* derivatives that are passed by the shader.
*/
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4])
{
struct lp_build_context *float_size_bld = &bld->float_size_bld;
struct lp_build_context *float_bld = &bld->float_bld;
const unsigned dims = bld->dims;
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
LLVMValueRef rho_x, rho_y;
LLVMValueRef rho_vec;
LLVMValueRef float_size;
LLVMValueRef rho;

dsdx = ddx[0];
dsdy = ddy[0];

if (dims <= 1) {
rho_x = dsdx;
rho_y = dsdy;
}
else {
rho_x = float_size_bld->undef;
rho_y = float_size_bld->undef;

rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");

dtdx = ddx[1];
dtdy = ddy[1];

rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");

if (dims >= 3) {
drdx = ddx[2];
drdy = ddy[2];

rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
}
}

rho_x = lp_build_abs(float_size_bld, rho_x);
rho_y = lp_build_abs(float_size_bld, rho_y);

rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);

float_size = lp_build_int_to_float(float_size_bld, bld->int_size);

rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);

if (dims <= 1) {
rho = rho_vec;
}
else {
if (dims >= 2) {
LLVMValueRef rho_s, rho_t, rho_r;

rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");

rho = lp_build_max(float_bld, rho_s, rho_t);

if (dims >= 3) {
rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
rho = lp_build_max(float_bld, rho, rho_r);
}
}
}

return rho;
}


/*
* Bri-linear lod computation
*
* Use a piece-wise linear approximation of log2 such that:
* - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
* - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
* with the steepness specified in 'factor'
* - exact result for 0.5, 1.5, etc.
*
*
* 1.0 - /----*
* /
* /
* /
* 0.5 - *
* /
* /
* /
* 0.0 - *----/
*
* | |
* 2^0 2^1
*
* This is a technique also commonly used in hardware:
* - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
*
* TODO: For correctness, this should only be applied when texture is known to
* have regular mipmaps, i.e., mipmaps derived from the base level.
*
* TODO: This could be done in fixed point, where applicable.
*/
static void
lp_build_brilinear_lod(struct lp_build_context *bld,
LLVMValueRef lod,
double factor,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart)
{
LLVMValueRef lod_fpart;
double pre_offset = (factor - 0.5)/factor - 0.5;
double post_offset = 1 - factor;

if (0) {
lp_build_printf(bld->builder, "lod = %f\n", lod);
}

lod = lp_build_add(bld, lod,
lp_build_const_vec(bld->type, pre_offset));

lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);

lod_fpart = lp_build_mul(bld, lod_fpart,
lp_build_const_vec(bld->type, factor));

lod_fpart = lp_build_add(bld, lod_fpart,
lp_build_const_vec(bld->type, post_offset));

/*
* It's not necessary to clamp lod_fpart since:
* - the above expression will never produce numbers greater than one.
* - the mip filtering branch is only taken if lod_fpart is positive
*/

*out_lod_fpart = lod_fpart;

if (0) {
lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart);
lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart);
}
}


/*
* Combined log2 and brilinear lod computation.
*
* It's in all identical to calling lp_build_fast_log2() and
* lp_build_brilinear_lod() above, but by combining we can compute the interger
* and fractional part independently.
*/
static void
lp_build_brilinear_rho(struct lp_build_context *bld,
LLVMValueRef rho,
double factor,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart)
{
LLVMValueRef lod_ipart;
LLVMValueRef lod_fpart;

const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
const double post_offset = 1 - 2*factor;

assert(bld->type.floating);

assert(lp_check_value(bld->type, rho));

/*
* The pre factor will make the intersections with the exact powers of two
* happen precisely where we want then to be, which means that the integer
* part will not need any post adjustments.
*/
rho = lp_build_mul(bld, rho,
lp_build_const_vec(bld->type, pre_factor));

/* ipart = ifloor(log2(rho)) */
lod_ipart = lp_build_extract_exponent(bld, rho, 0);

/* fpart = rho / 2**ipart */
lod_fpart = lp_build_extract_mantissa(bld, rho);

lod_fpart = lp_build_mul(bld, lod_fpart,
lp_build_const_vec(bld->type, factor));

lod_fpart = lp_build_add(bld, lod_fpart,
lp_build_const_vec(bld->type, post_offset));

/*
* Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
* - the above expression will never produce numbers greater than one.
* - the mip filtering branch is only taken if lod_fpart is positive
*/

*out_lod_ipart = lod_ipart;
*out_lod_fpart = lod_fpart;
}


/**
* Generate code to compute texture level of detail (lambda).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
@@ -138,83 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
* XXX: The resulting lod is scalar, so ignore all but the first element of
* derivatives, lod_bias, etc that are passed by the shader.
*/
LLVMValueRef
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
unsigned mip_filter,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart)

{
if (bld->static_state->min_lod == bld->static_state->max_lod) {
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef lod;

*out_lod_ipart = bld->int_bld.zero;
*out_lod_fpart = bld->float_bld.zero;

if (bld->static_state->min_max_lod_equal) {
/* User is forcing sampling from a particular mipmap level.
* This is hit during mipmap generation.
*/
return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
LLVMValueRef min_lod =
bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);

lod = min_lod;
}
else {
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
bld->static_state->lod_bias);
LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
bld->static_state->min_lod);
LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
bld->static_state->max_lod);
LLVMValueRef sampler_lod_bias =
bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
LLVMValueRef lod;

if (explicit_lod) {
lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
index0, "");
}
else {
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef dsdx, dsdy;
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;

dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
dsdx = lp_build_abs(float_bld, dsdx);
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
dsdy = lp_build_abs(float_bld, dsdy);
if (dims > 1) {
dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
dtdx = lp_build_abs(float_bld, dtdx);
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
dtdy = lp_build_abs(float_bld, dtdy);
if (dims > 2) {
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
drdx = lp_build_abs(float_bld, drdx);
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
drdy = lp_build_abs(float_bld, drdy);
}
}
rho = lp_build_rho(bld, ddx, ddy);

/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
/*
* Compute lod = log2(rho)
*/
rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);

if (!lod_bias &&
!bld->static_state->lod_bias_non_zero &&
!bld->static_state->apply_max_lod &&
!bld->static_state->apply_min_lod) {
/*
* Special case when there are no post-log2 adjustments, which
* saves instructions but keeping the integer and fractional lod
* computations separate from the start.
*/

if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
*out_lod_ipart = lp_build_ilog2(float_bld, rho);
*out_lod_fpart = bld->float_bld.zero;
return;
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
out_lod_ipart, out_lod_fpart);
return;
}
}

/* compute lod = log2(rho) */
lod = lp_build_log2(float_bld, rho);
if (0) {
lod = lp_build_log2(float_bld, rho);
}
else {
lod = lp_build_fast_log2(float_bld, rho);
}

/* add shader lod bias */
if (lod_bias) {
@@ -225,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
}

/* add sampler lod bias */
lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
if (bld->static_state->lod_bias_non_zero)
lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");


/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
if (bld->static_state->apply_max_lod) {
LLVMValueRef max_lod =
bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);

return lod;
lod = lp_build_min(float_bld, lod, max_lod);
}
if (bld->static_state->apply_min_lod) {
LLVMValueRef min_lod =
bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);

lod = lp_build_max(float_bld, lod, min_lod);
}
}

if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
out_lod_ipart, out_lod_fpart);
}
else {
lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
}

lp_build_name(*out_lod_fpart, "lod_fpart");
}
else {
*out_lod_ipart = lp_build_iround(float_bld, lod);
}

lp_build_name(*out_lod_ipart, "lod_ipart");

return;
}


@@ -245,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
unsigned unit,
LLVMValueRef lod,
LLVMValueRef lod_ipart,
LLVMValueRef *level_out)
{
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;

@@ -258,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
bld->builder, unit);

/* convert float lod to integer */
level = lp_build_iround(float_bld, lod);
level = lod_ipart;

/* clamp level to legal range of levels */
*level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -273,43 +563,77 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
LLVMValueRef lod,
LLVMValueRef lod_ipart,
LLVMValueRef *lod_fpart_inout,
LLVMValueRef *level0_out,
LLVMValueRef *level1_out,
LLVMValueRef *weight_out)
LLVMValueRef *level1_out)
{
struct lp_build_context *float_bld = &bld->float_bld;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef last_level;
LLVMValueRef clamp_min;
LLVMValueRef clamp_max;

*level0_out = lod_ipart;
*level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);

last_level = bld->dynamic_state->last_level(bld->dynamic_state,
bld->builder, unit);

/* convert float lod to integer */
level = lp_build_ifloor(float_bld, lod);

/* compute level 0 and clamp to legal range of levels */
*level0_out = lp_build_clamp(int_bld, level,
int_bld->zero,
last_level);
/* compute level 1 and clamp to legal range of levels */
level = lp_build_add(int_bld, level, int_bld->one);
*level1_out = lp_build_clamp(int_bld, level,
int_bld->zero,
last_level);

*weight_out = lp_build_fract(float_bld, lod);
/*
* Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
* minimum number of comparisons, and zeroing lod_fpart in the extreme
* ends in the process.
*/

/* lod_ipart < 0 */
clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
lod_ipart, int_bld->zero,
"clamp_lod_to_zero");

*level0_out = LLVMBuildSelect(builder, clamp_min,
int_bld->zero, *level0_out, "");

*level1_out = LLVMBuildSelect(builder, clamp_min,
int_bld->zero, *level1_out, "");

*lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
float_bld->zero, *lod_fpart_inout, "");

/* lod_ipart >= last_level */
clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
lod_ipart, last_level,
"clamp_lod_to_last");

*level0_out = LLVMBuildSelect(builder, clamp_max,
last_level, *level0_out, "");

*level1_out = LLVMBuildSelect(builder, clamp_max,
last_level, *level1_out, "");

*lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
float_bld->zero, *lod_fpart_inout, "");

lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
}


/**
* Return pointer to a single mipmap level.
* \param data_array array of pointers to mipmap levels
* \param level integer mipmap level
*/
LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef data_array, LLVMValueRef level)
LLVMValueRef level)
{
LLVMValueRef indexes[2], data_ptr;
indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
indexes[1] = level;
data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, "");
data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
return data_ptr;
}
@@ -317,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,

LLVMValueRef
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef data_array, int level)
int level)
{
LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
return lp_build_get_mipmap_level(bld, data_array, lvl);
return lp_build_get_mipmap_level(bld, lvl);
}


@@ -329,13 +653,24 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
* Return max(1, base_size >> level);
*/
static LLVMValueRef
lp_build_minify(struct lp_build_sample_context *bld,
lp_build_minify(struct lp_build_context *bld,
LLVMValueRef base_size,
LLVMValueRef level)
{
LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
return size;
assert(lp_check_value(bld->type, base_size));
assert(lp_check_value(bld->type, level));

if (level == bld->zero) {
/* if we're using mipmap level zero, no minification is needed */
return base_size;
}
else {
LLVMValueRef size =
LLVMBuildLShr(bld->builder, base_size, level, "minify");
assert(bld->type.sign);
size = lp_build_max(bld, size, bld->one);
return size;
}
}


@@ -364,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
*/
void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
unsigned dims,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
LLVMValueRef ilevel0,
LLVMValueRef ilevel1,
LLVMValueRef row_stride_array,
LLVMValueRef img_stride_array,
LLVMValueRef *width0_vec,
LLVMValueRef *width1_vec,
LLVMValueRef *height0_vec,
LLVMValueRef *height1_vec,
LLVMValueRef *depth0_vec,
LLVMValueRef *depth1_vec,
LLVMValueRef *row_stride0_vec,
LLVMValueRef *row_stride1_vec,
LLVMValueRef *img_stride0_vec,
LLVMValueRef *img_stride1_vec)
LLVMValueRef ilevel,
LLVMValueRef *out_size,
LLVMValueRef *row_stride_vec,
LLVMValueRef *img_stride_vec)
{
const unsigned mip_filter = bld->static_state->min_mip_filter;
LLVMValueRef ilevel0_vec, ilevel1_vec;
const unsigned dims = bld->dims;
LLVMValueRef ilevel_vec;

ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);

/*
* Compute width, height, depth at mipmap level 'ilevel0'
* Compute width, height, depth at mipmap level 'ilevel'
*/
*width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
*out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);

if (dims >= 2) {
*height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
*row_stride0_vec = lp_build_get_level_stride_vec(bld,
row_stride_array,
ilevel0);
*row_stride_vec = lp_build_get_level_stride_vec(bld,
bld->row_stride_array,
ilevel);
if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
*img_stride0_vec = lp_build_get_level_stride_vec(bld,
img_stride_array,
ilevel0);
if (dims == 3) {
*depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
}
*img_stride_vec = lp_build_get_level_stride_vec(bld,
bld->img_stride_array,
ilevel);
}
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
/* compute width, height, depth for second mipmap level at 'ilevel1' */
*width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
if (dims >= 2) {
*height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
*row_stride1_vec = lp_build_get_level_stride_vec(bld,
row_stride_array,
ilevel1);
if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
*img_stride1_vec = lp_build_get_level_stride_vec(bld,
img_stride_array,
ilevel1);
if (dims == 3) {
*depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
}
}
}


/**
* Extract and broadcast texture size.
*
* @param size_type type of the texture size vector (either
* bld->int_size_type or bld->float_size_type)
* @param coord_type type of the texture size vector (either
* bld->int_coord_type or bld->coord_type)
* @param int_size vector with the integer texture size (width, height,
* depth)
*/
void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
struct lp_type size_type,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
LLVMValueRef *out_height,
LLVMValueRef *out_depth)
{
const unsigned dims = bld->dims;
LLVMTypeRef i32t = LLVMInt32Type();

*out_width = lp_build_extract_broadcast(bld->builder,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 0, 0));
if (dims >= 2) {
*out_height = lp_build_extract_broadcast(bld->builder,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 1, 0));
if (dims == 3) {
*out_depth = lp_build_extract_broadcast(bld->builder,
size_type,
coord_type,
size,
LLVMConstInt(i32t, 2, 0));
}
}
}


/**
* Unnormalize coords.
*
* @param int_size vector with the integer texture size (width, height, depth)
*/
void
lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
LLVMValueRef flt_size,
LLVMValueRef *s,
LLVMValueRef *t,
LLVMValueRef *r)
{
const unsigned dims = bld->dims;
LLVMValueRef width;
LLVMValueRef height;
LLVMValueRef depth;

lp_build_extract_image_sizes(bld,
bld->float_size_type,
bld->coord_type,
flt_size,
&width,
&height,
&depth);

/* s = s * width, t = t * height */
*s = lp_build_mul(&bld->coord_bld, *s, width);
if (dims >= 2) {
*t = lp_build_mul(&bld->coord_bld, *t, height);
if (dims >= 3) {
*r = lp_build_mul(&bld->coord_bld, *r, depth);
}
}
}


/** Helper used by lp_build_cube_lookup() */
static LLVMValueRef
@@ -547,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");

{
struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef face_s_var;
LLVMValueRef face_t_var;
LLVMValueRef face_var;

flow_ctx = lp_build_flow_create(bld->builder);
lp_build_flow_scope_begin(flow_ctx);

*face_s = bld->coord_bld.undef;
*face_t = bld->coord_bld.undef;
*face = bld->int_bld.undef;

lp_build_name(*face_s, "face_s");
lp_build_name(*face_t, "face_t");
lp_build_name(*face, "face");
face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var");
face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var");
face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var");

lp_build_flow_scope_declare(flow_ctx, face_s);
lp_build_flow_scope_declare(flow_ctx, face_t);
lp_build_flow_scope_declare(flow_ctx, face);

lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz);
{
/* +/- X face */
LLVMValueRef sign = lp_build_sgn(float_bld, rx);
@@ -575,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
*face = lp_build_cube_face(bld, rx,
PIPE_TEX_FACE_POS_X,
PIPE_TEX_FACE_NEG_X);
LLVMBuildStore(bld->builder, *face_s, face_s_var);
LLVMBuildStore(bld->builder, *face_t, face_t_var);
LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_else(&if_ctx);
{
struct lp_build_flow_context *flow_ctx2;
struct lp_build_if_state if_ctx2;

LLVMValueRef face_s2 = bld->coord_bld.undef;
LLVMValueRef face_t2 = bld->coord_bld.undef;
LLVMValueRef face2 = bld->int_bld.undef;

flow_ctx2 = lp_build_flow_create(bld->builder);
lp_build_flow_scope_begin(flow_ctx2);
lp_build_flow_scope_declare(flow_ctx2, &face_s2);
lp_build_flow_scope_declare(flow_ctx2, &face_t2);
lp_build_flow_scope_declare(flow_ctx2, &face2);

ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");

lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz);
{
/* +/- Y face */
LLVMValueRef sign = lp_build_sgn(float_bld, ry);
LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
face2 = lp_build_cube_face(bld, ry,
*face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
*face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
*face = lp_build_cube_face(bld, ry,
PIPE_TEX_FACE_POS_Y,
PIPE_TEX_FACE_NEG_Y);
LLVMBuildStore(bld->builder, *face_s, face_s_var);
LLVMBuildStore(bld->builder, *face_t, face_t_var);
LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_else(&if_ctx2);
{
/* +/- Z face */
LLVMValueRef sign = lp_build_sgn(float_bld, rz);
LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
face2 = lp_build_cube_face(bld, rz,
*face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
*face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
*face = lp_build_cube_face(bld, rz,
PIPE_TEX_FACE_POS_Z,
PIPE_TEX_FACE_NEG_Z);
LLVMBuildStore(bld->builder, *face_s, face_s_var);
LLVMBuildStore(bld->builder, *face_t, face_t_var);
LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_endif(&if_ctx2);
lp_build_flow_scope_end(flow_ctx2);
lp_build_flow_destroy(flow_ctx2);
*face_s = face_s2;
*face_t = face_t2;
*face = face2;
}

lp_build_endif(&if_ctx);
lp_build_flow_scope_end(flow_ctx);
lp_build_flow_destroy(flow_ctx);

*face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s");
*face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t");
*face = LLVMBuildLoad(bld->builder, face_var, "face");
}
}

@@ -659,11 +1022,21 @@ lp_build_sample_partial_offset(struct lp_build_context *bld,
* Pixel blocks have power of two dimensions. LLVM should convert the
* rem/div to bit arithmetic.
* TODO: Verify this.
* It does indeed BUT it does transform it to scalar (and back) when doing so
* (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
* The generated code looks seriously unfunny and is quite expensive.
*/

#if 0
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
#else
unsigned logbase2 = util_unsigned_logbase2(block_length);
LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2);
LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1);
subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, "");
coord = LLVMBuildLShr(bld->builder, coord, block_shift, "");
#endif
}

offset = lp_build_mul(bld, coord, stride);

+ 93
- 42
src/gallium/auxiliary/gallivm/lp_bld_sample.h Vedi File

@@ -82,12 +82,10 @@ struct lp_sampler_static_state
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned normalized_coords:1;
float lod_bias, min_lod, max_lod;
float border_color[4];

/* Aero hacks */
unsigned force_nearest_s:1;
unsigned force_nearest_t:1;
unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */
unsigned lod_bias_non_zero:1;
unsigned apply_min_lod:1; /**< min_lod > 0 ? */
unsigned apply_max_lod:1; /**< max_lod < last_level ? */
};


@@ -104,45 +102,67 @@ struct lp_sampler_static_state
struct lp_sampler_dynamic_state
{

/** Obtain the base texture width. */
/** Obtain the base texture width (returns int32) */
LLVMValueRef
(*width)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain the base texture height. */
/** Obtain the base texture height (returns int32) */
LLVMValueRef
(*height)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain the base texture depth. */
/** Obtain the base texture depth (returns int32) */
LLVMValueRef
(*depth)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain the number of mipmap levels (minus one). */
/** Obtain the number of mipmap levels minus one (returns int32) */
LLVMValueRef
(*last_level)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain stride in bytes between image rows/blocks (returns int32) */
LLVMValueRef
(*row_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain stride in bytes between image slices (returns int32) */
LLVMValueRef
(*img_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain pointer to array of pointers to mimpap levels */
LLVMValueRef
(*data_ptr)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);

/** Obtain texture min lod (returns float) */
LLVMValueRef
(*min_lod)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);

/** Obtain texture max lod (returns float) */
LLVMValueRef
(*max_lod)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);

/** Obtain texture lod bias (returns float) */
LLVMValueRef
(*lod_bias)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);

/** Obtain texture border color (returns ptr to float[4]) */
LLVMValueRef
(*border_color)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);
};


@@ -159,10 +179,16 @@ struct lp_build_sample_context

const struct util_format_description *format_desc;

/* See texture_dims() */
unsigned dims;

/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;

/** float vector type */
struct lp_build_context float_vec_bld;

/** regular scalar float type */
struct lp_type int_type;
struct lp_build_context int_bld;
@@ -171,17 +197,32 @@ struct lp_build_sample_context
struct lp_type coord_type;
struct lp_build_context coord_bld;

/** Unsigned integer coordinates */
struct lp_type uint_coord_type;
struct lp_build_context uint_coord_bld;

/** Signed integer coordinates */
struct lp_type int_coord_type;
struct lp_build_context int_coord_bld;

/** Unsigned integer texture size */
struct lp_type int_size_type;
struct lp_build_context int_size_bld;

/** Unsigned integer texture size */
struct lp_type float_size_type;
struct lp_build_context float_size_bld;

/** Output texels type and build context */
struct lp_type texel_type;
struct lp_build_context texel_bld;

/* Common dynamic state values */
LLVMValueRef width;
LLVMValueRef height;
LLVMValueRef depth;
LLVMValueRef row_stride_array;
LLVMValueRef img_stride_array;
LLVMValueRef data_array;

/** Integer vector with texture width, height, depth */
LLVMValueRef int_size;
};


@@ -218,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
}


static INLINE int
static INLINE unsigned
texture_dims(enum pipe_texture_target tex)
{
switch (tex) {
@@ -237,6 +278,11 @@ texture_dims(enum pipe_texture_target tex)
}


boolean
lp_sampler_wrap_mode_uses_border_color(unsigned mode,
unsigned min_img_filter,
unsigned mag_img_filter);

/**
* Derive the sampler static state.
*/
@@ -246,15 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);


LLVMValueRef
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth);
unsigned mip_filter,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart);

void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -265,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
LLVMValueRef lod,
LLVMValueRef lod_ipart,
LLVMValueRef *lod_fpart_inout,
LLVMValueRef *level0_out,
LLVMValueRef *level1_out,
LLVMValueRef *weight_out);
LLVMValueRef *level1_out);

LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef data_array, LLVMValueRef level);
LLVMValueRef level);

LLVMValueRef
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef data_array, int level);
int level);


void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
unsigned dims,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
LLVMValueRef ilevel0,
LLVMValueRef ilevel1,
LLVMValueRef row_stride_array,
LLVMValueRef img_stride_array,
LLVMValueRef *width0_vec,
LLVMValueRef *width1_vec,
LLVMValueRef *height0_vec,
LLVMValueRef *height1_vec,
LLVMValueRef *depth0_vec,
LLVMValueRef *depth1_vec,
LLVMValueRef *row_stride0_vec,
LLVMValueRef *row_stride1_vec,
LLVMValueRef *img_stride0_vec,
LLVMValueRef *img_stride1_vec);
LLVMValueRef ilevel,
LLVMValueRef *out_size_vec,
LLVMValueRef *row_stride_vec,
LLVMValueRef *img_stride_vec);


void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
struct lp_type size_type,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
LLVMValueRef *out_height,
LLVMValueRef *out_depth);


void
lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
LLVMValueRef flt_size,
LLVMValueRef *s,
LLVMValueRef *t,
LLVMValueRef *r);


void

+ 289
- 332
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c Vedi File

@@ -45,6 +45,7 @@
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
@@ -80,20 +81,21 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
LLVMValueRef *out_offset,
LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;

length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);

switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
if(is_pot)
coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
else
/* Signed remainder won't give the right results for negative
* dividends but unsigned remainder does.*/
else {
/* Add a bias to the texcoord to handle negative coords */
LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
coord = LLVMBuildAdd(bld->builder, coord, bias, "");
coord = LLVMBuildURem(bld->builder, coord, length, "");
}
break;

case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
@@ -111,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
assert(0);
}

lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
out_offset, out_i);
}

@@ -144,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
LLVMValueRef *i0,
LLVMValueRef *i1)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
LLVMValueRef lmask, umask, mask;
@@ -186,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
* multiplication.
*/

*i0 = uint_coord_bld->zero;
*i1 = uint_coord_bld->zero;
*i0 = int_coord_bld->zero;
*i1 = int_coord_bld->zero;

length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);

@@ -197,17 +198,18 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
}
else {
/* Signed remainder won't give the right results for negative
* dividends but unsigned remainder does.*/
/* Add a bias to the texcoord to handle negative coords */
LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
}

mask = lp_build_compare(bld->builder, int_coord_bld->type,
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);

*offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
*offset0 = lp_build_mul(int_coord_bld, coord0, stride);
*offset1 = LLVMBuildAnd(bld->builder,
lp_build_add(uint_coord_bld, *offset0, stride),
lp_build_add(int_coord_bld, *offset0, stride),
mask, "");
break;

@@ -222,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,

mask = LLVMBuildAnd(bld->builder, lmask, umask, "");

*offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
*offset1 = lp_build_add(uint_coord_bld,
*offset0 = lp_build_mul(int_coord_bld, coord0, stride);
*offset1 = lp_build_add(int_coord_bld,
*offset0,
LLVMBuildAnd(bld->builder, stride, mask, ""));
break;
@@ -236,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(0);
*offset0 = uint_coord_bld->zero;
*offset1 = uint_coord_bld->zero;
*offset0 = int_coord_bld->zero;
*offset1 = int_coord_bld->zero;
break;
}
}
@@ -250,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
LLVMValueRef int_size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -262,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
{
const int dims = texture_dims(bld->static_state->target);
const unsigned dims = bld->dims;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef i32_c8;
LLVMValueRef width_vec, height_vec, depth_vec;
LLVMValueRef s_ipart, t_ipart, r_ipart;
LLVMValueRef x_stride;
LLVMValueRef x_offset, offset;
@@ -280,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);

lp_build_extract_image_sizes(bld,
bld->int_size_type,
bld->int_coord_type,
int_size,
&width_vec,
&height_vec,
&depth_vec);

if (bld->static_state->normalized_coords) {
/* s = s * width, t = t * height */
LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
coord_vec_type, "");
s = lp_build_mul(&bld->coord_bld, s, fp_width);
if (dims >= 2) {
LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
coord_vec_type, "");
t = lp_build_mul(&bld->coord_bld, t, fp_height);
if (dims >= 3) {
LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
coord_vec_type, "");
r = lp_build_mul(&bld->coord_bld, r, fp_depth);
}
}
}
LLVMValueRef scaled_size;
LLVMValueRef flt_size;

/* scale coords by 256 (8 fractional bits) */
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
if (dims >= 2)
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
if (dims >= 3)
r = lp_build_mul_imm(&bld->coord_bld, r, 256);
/* scale size by 256 (8 fractional bits) */
scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);

flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);

lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
}
else {
/* scale coords by 256 (8 fractional bits) */
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
if (dims >= 2)
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
if (dims >= 3)
r = lp_build_mul_imm(&bld->coord_bld, r, 256);
}

/* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -321,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");

/* get pixel, row, image strides */
x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
x_stride = lp_build_const_vec(bld->int_coord_bld.type,
bld->format_desc->block.bits/8);

/* Do texcoord wrapping, compute texel offset */
@@ -340,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y_offset, &y_subcoord);
offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
if (dims >= 3) {
LLVMValueRef z_offset;
lp_build_sample_wrap_nearest_int(bld,
@@ -349,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
bld->static_state->pot_height,
bld->static_state->wrap_r,
&z_offset, &z_subcoord);
offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
LLVMValueRef z_offset;
/* The r coord is the cube face in [0,5] */
z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
}

@@ -414,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
LLVMValueRef int_size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -426,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
{
const int dims = texture_dims(bld->static_state->target);
const unsigned dims = bld->dims;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef width_vec, height_vec, depth_vec;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
@@ -455,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);

lp_build_extract_image_sizes(bld,
bld->int_size_type,
bld->int_coord_type,
int_size,
&width_vec,
&height_vec,
&depth_vec);

if (bld->static_state->normalized_coords) {
/* s = s * width, t = t * height */
LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
coord_vec_type, "");
s = lp_build_mul(&bld->coord_bld, s, fp_width);
if (dims >= 2) {
LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
coord_vec_type, "");
t = lp_build_mul(&bld->coord_bld, t, fp_height);
}
if (dims >= 3) {
LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
coord_vec_type, "");
r = lp_build_mul(&bld->coord_bld, r, fp_depth);
}
}
LLVMValueRef scaled_size;
LLVMValueRef flt_size;

/* scale coords by 256 (8 fractional bits) */
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
if (dims >= 2)
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
if (dims >= 3)
r = lp_build_mul_imm(&bld->coord_bld, r, 256);
/* scale size by 256 (8 fractional bits) */
scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);

flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);

lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
}
else {
/* scale coords by 256 (8 fractional bits) */
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
if (dims >= 2)
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
if (dims >= 3)
r = lp_build_mul_imm(&bld->coord_bld, r, 256);
}

/* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -489,10 +495,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,

/* subtract 0.5 (add -128) */
i32_c128 = lp_build_const_int_vec(i32.type, -128);
if (!bld->static_state->force_nearest_s) {
s = LLVMBuildAdd(builder, s, i32_c128, "");
}
if (dims >= 2 && !bld->static_state->force_nearest_t) {
s = LLVMBuildAdd(builder, s, i32_c128, "");
if (dims >= 2) {
t = LLVMBuildAdd(builder, t, i32_c128, "");
}
if (dims >= 3) {
@@ -516,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");

/* get pixel, row and image strides */
x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
x_stride = lp_build_const_vec(bld->int_coord_bld.type,
bld->format_desc->block.bits/8);
y_stride = row_stride_vec;
z_stride = img_stride_vec;
@@ -547,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,

for (z = 0; z < 2; z++) {
for (x = 0; x < 2; x++) {
offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
offset[z][0][x], y_offset0);
offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
offset[z][1][x], y_offset1);
}
}
@@ -565,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
&z_subcoord[0], &z_subcoord[1]);
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
offset[0][y][x], z_offset0);
offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
offset[1][y][x], z_offset1);
}
}
}
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
LLVMValueRef z_offset;
z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
/* The r coord is the cube face in [0,5] */
offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
offset[0][y][x], z_offset);
}
}
@@ -709,82 +713,56 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/*
* Linear interpolation with 8.8 fixed point.
*/
if (bld->static_state->force_nearest_s) {
/* special case 1-D lerp */
packed_lo = lp_build_lerp(&h16,
t_fpart_lo,
neighbors_lo[0][0][0],
neighbors_lo[0][0][1]);

packed_hi = lp_build_lerp(&h16,
t_fpart_hi,
neighbors_hi[0][1][0],
neighbors_hi[0][1][0]);
}
else if (bld->static_state->force_nearest_t) {
/* special case 1-D lerp */
if (dims == 1) {
/* 1-D lerp */
packed_lo = lp_build_lerp(&h16,
s_fpart_lo,
neighbors_lo[0][0][0],
neighbors_lo[0][0][1]);
s_fpart_lo,
neighbors_lo[0][0][0],
neighbors_lo[0][0][1]);

packed_hi = lp_build_lerp(&h16,
s_fpart_hi,
neighbors_hi[0][0][0],
neighbors_hi[0][0][1]);
s_fpart_hi,
neighbors_hi[0][0][0],
neighbors_hi[0][0][1]);
}
else {
/* general 1/2/3-D lerping */
if (dims == 1) {
packed_lo = lp_build_lerp(&h16,
s_fpart_lo,
neighbors_lo[0][0][0],
neighbors_lo[0][0][1]);

packed_hi = lp_build_lerp(&h16,
s_fpart_hi,
neighbors_hi[0][0][0],
neighbors_hi[0][0][1]);
}
else {
/* 2-D lerp */
packed_lo = lp_build_lerp_2d(&h16,
s_fpart_lo, t_fpart_lo,
neighbors_lo[0][0][0],
neighbors_lo[0][0][1],
neighbors_lo[0][1][0],
neighbors_lo[0][1][1]);

packed_hi = lp_build_lerp_2d(&h16,
s_fpart_hi, t_fpart_hi,
neighbors_hi[0][0][0],
neighbors_hi[0][0][1],
neighbors_hi[0][1][0],
neighbors_hi[0][1][1]);

if (dims >= 3) {
LLVMValueRef packed_lo2, packed_hi2;

/* lerp in the second z slice */
packed_lo2 = lp_build_lerp_2d(&h16,
s_fpart_lo, t_fpart_lo,
neighbors_lo[1][0][0],
neighbors_lo[1][0][1],
neighbors_lo[1][1][0],
neighbors_lo[1][1][1]);

packed_hi2 = lp_build_lerp_2d(&h16,
s_fpart_hi, t_fpart_hi,
neighbors_hi[1][0][0],
neighbors_hi[1][0][1],
neighbors_hi[1][1][0],
neighbors_hi[1][1][1]);
/* interp between two z slices */
packed_lo = lp_build_lerp(&h16, r_fpart_lo,
packed_lo, packed_lo2);
packed_hi = lp_build_lerp(&h16, r_fpart_hi,
packed_hi, packed_hi2);
}
/* 2-D lerp */
packed_lo = lp_build_lerp_2d(&h16,
s_fpart_lo, t_fpart_lo,
neighbors_lo[0][0][0],
neighbors_lo[0][0][1],
neighbors_lo[0][1][0],
neighbors_lo[0][1][1]);

packed_hi = lp_build_lerp_2d(&h16,
s_fpart_hi, t_fpart_hi,
neighbors_hi[0][0][0],
neighbors_hi[0][0][1],
neighbors_hi[0][1][0],
neighbors_hi[0][1][1]);

if (dims >= 3) {
LLVMValueRef packed_lo2, packed_hi2;

/* lerp in the second z slice */
packed_lo2 = lp_build_lerp_2d(&h16,
s_fpart_lo, t_fpart_lo,
neighbors_lo[1][0][0],
neighbors_lo[1][0][1],
neighbors_lo[1][1][0],
neighbors_lo[1][1][1]);

packed_hi2 = lp_build_lerp_2d(&h16,
s_fpart_hi, t_fpart_hi,
neighbors_hi[1][0][0],
neighbors_hi[1][0][1],
neighbors_hi[1][1][0],
neighbors_hi[1][1][1]);
/* interp between two z slices */
packed_lo = lp_build_lerp(&h16, r_fpart_lo,
packed_lo, packed_lo2);
packed_hi = lp_build_lerp(&h16, r_fpart_hi,
packed_hi, packed_hi2);
}
}

@@ -806,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
LLVMValueRef ilevel0,
LLVMValueRef ilevel1,
LLVMValueRef lod_fpart,
LLVMValueRef width0_vec,
LLVMValueRef width1_vec,
LLVMValueRef height0_vec,
LLVMValueRef height1_vec,
LLVMValueRef depth0_vec,
LLVMValueRef depth1_vec,
LLVMValueRef row_stride0_vec,
LLVMValueRef row_stride1_vec,
LLVMValueRef img_stride0_vec,
LLVMValueRef img_stride1_vec,
LLVMValueRef data_ptr0,
LLVMValueRef data_ptr1,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
LLVMValueRef colors_lo_var,
LLVMValueRef colors_hi_var)
{
LLVMBuilderRef builder = bld->builder;
LLVMValueRef size0;
LLVMValueRef size1;
LLVMValueRef row_stride0_vec;
LLVMValueRef row_stride1_vec;
LLVMValueRef img_stride0_vec;
LLVMValueRef img_stride1_vec;
LLVMValueRef data_ptr0;
LLVMValueRef data_ptr1;
LLVMValueRef colors0_lo, colors0_hi;
LLVMValueRef colors1_lo, colors1_hi;


/* sample the first mipmap level */
lp_build_mipmap_level_sizes(bld, ilevel0,
&size0,
&row_stride0_vec, &img_stride0_vec);
data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
/* sample the first mipmap level */
lp_build_sample_image_nearest(bld,
width0_vec, height0_vec, depth0_vec,
size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r,
&colors0_lo, &colors0_hi);

if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
/* sample the second mipmap level */
lp_build_sample_image_nearest(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
data_ptr1, s, t, r,
&colors1_lo, &colors1_hi);
}
}
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);

/* sample the first mipmap level */
lp_build_sample_image_linear(bld,
width0_vec, height0_vec, depth0_vec,
size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r,
&colors0_lo, &colors0_hi);

if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
/* sample the second mipmap level */
lp_build_sample_image_linear(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
data_ptr1, s, t, r,
&colors1_lo, &colors1_hi);
}
}

/* Store the first level's colors in the output variables */
LLVMBuildStore(builder, colors0_lo, colors_lo_var);
LLVMBuildStore(builder, colors0_hi, colors_hi_var);

if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
/* interpolate samples from the two mipmap levels */
struct lp_build_context h16;
lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));

*colors_lo = lp_build_lerp(&h16, lod_fpart,
colors0_lo, colors1_lo);
*colors_hi = lp_build_lerp(&h16, lod_fpart,
colors0_hi, colors1_hi);
}
else {
/* use first/only level's colors */
*colors_lo = colors0_lo;
*colors_hi = colors0_hi;
LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
LLVMTypeRef i32_type = LLVMIntType(32);
struct lp_build_if_state if_ctx;
LLVMValueRef need_lerp;

lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");

/* need_lerp = lod_fpart > 0 */
need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
lod_fpart, LLVMConstNull(i32_type),
"need_lerp");

lp_build_if(&if_ctx, builder, need_lerp);
{
struct lp_build_context h16_bld;

lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));

/* sample the second mipmap level */
lp_build_mipmap_level_sizes(bld, ilevel1,
&size1,
&row_stride1_vec, &img_stride1_vec);
data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest(bld,
size1,
row_stride1_vec, img_stride1_vec,
data_ptr1, s, t, r,
&colors1_lo, &colors1_hi);
}
else {
lp_build_sample_image_linear(bld,
size1,
row_stride1_vec, img_stride1_vec,
data_ptr1, s, t, r,
&colors1_lo, &colors1_hi);
}

/* interpolate samples from the two mipmap levels */

lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);

#if HAVE_LLVM == 0x208
/* This is a work-around for a bug in LLVM 2.8.
* Evidently, something goes wrong in the construction of the
* lod_fpart short[8] vector. Adding this no-effect shuffle seems
* to force the vector to be properly constructed.
* Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
*/
{
LLVMValueRef shuffles[8], shuffle;
int i;
assert(h16_bld.type.length <= Elements(shuffles));
for (i = 0; i < h16_bld.type.length; i++)
shuffles[i] = lp_build_const_int32(2 * (i & 1));
shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
lod_fpart = LLVMBuildShuffleVector(builder,
lod_fpart, lod_fpart,
shuffle, "");
}
#endif

colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
colors0_lo, colors1_lo);
colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
colors0_hi, colors1_hi);

LLVMBuildStore(builder, colors0_lo, colors_lo_var);
LLVMBuildStore(builder, colors0_hi, colors_hi_var);
}
lp_build_endif(&if_ctx);
}
}

@@ -896,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
LLVMValueRef row_stride_array,
LLVMValueRef img_stride_array,
LLVMValueRef data_array,
LLVMValueRef texel_out[4])
{
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef lod = NULL, lod_fpart = NULL;
const unsigned dims = bld->dims;
LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
LLVMValueRef data_ptr0, data_ptr1 = NULL;
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
LLVMValueRef face_ddx[4], face_ddy[4];
struct lp_build_context h16;
LLVMTypeRef h16_vec_type;
struct lp_build_context h16_bld;
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);

/* we only support the common/simple wrap modes at this time */
assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
@@ -935,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,


/* make 16-bit fixed-pt builder context */
lp_build_context_init(&h16, builder, lp_type_ufixed(16));
h16_vec_type = lp_build_vec_type(h16.type);

lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));

/* cube face selection, compute pre-face coords, etc. */
if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
@@ -949,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */

/* recompute ddx, ddy using the new (s,t) face texcoords */
face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
face_ddx[2] = NULL;
face_ddx[3] = NULL;
face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
face_ddy[2] = NULL;
face_ddy[3] = NULL;
ddx = face_ddx;
ddy = face_ddy;
}


/*
* Compute the level of detail (float).
*/
@@ -970,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
lod = lp_build_lod_selector(bld, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth);
lp_build_lod_selector(bld, unit, ddx, ddy,
lod_bias, explicit_lod,
mip_filter,
&lod_ipart, &lod_fpart);
} else {
lod_ipart = i32t_zero;
}

/*
* Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
* If mipfilter=linear, also compute the weight between the two
* mipmap levels: lod_fpart
*/
switch (mip_filter) {
default:
@@ -991,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
assert(lod_ipart);
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
ilevel0 = i32t_zero;
}
break;
case PIPE_TEX_MIPFILTER_NEAREST:
assert(lod);
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
assert(lod_ipart);
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
break;
case PIPE_TEX_MIPFILTER_LINEAR:
{
LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
LLVMValueRef i255 = lp_build_const_int32(255);
LLVMTypeRef i16_type = LLVMIntType(16);

assert(lod);

lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
&lod_fpart);
lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);

/* the lod_fpart values will be fixed pt values in [0,1) */
}
assert(lod_ipart);
assert(lod_fpart);
lp_build_linear_mip_levels(bld, unit,
lod_ipart, &lod_fpart,
&ilevel0, &ilevel1);
break;
}

/* compute image size(s) of source mipmap level(s) */
lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
ilevel0, ilevel1,
row_stride_array, img_stride_array,
&width0_vec, &width1_vec,
&height0_vec, &height1_vec,
&depth0_vec, &depth1_vec,
&row_stride0_vec, &row_stride1_vec,
&img_stride0_vec, &img_stride1_vec);

/*
* Get pointer(s) to image data for mipmap level(s).
* Get/interpolate texture colors.
*/
data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
}

packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");

/*
* Get/interpolate texture colors.
*/
if (min_filter == mag_filter) {
/* no need to distinquish between minification and magnification */
lp_build_sample_mipmap(bld, min_filter, mip_filter,
s, t, r, lod_fpart,
width0_vec, width1_vec,
height0_vec, height1_vec,
depth0_vec, depth1_vec,
row_stride0_vec, row_stride1_vec,
img_stride0_vec, img_stride1_vec,
data_ptr0, data_ptr1,
&packed_lo, &packed_hi);
lp_build_sample_mipmap(bld,
min_filter, mip_filter,
s, t, r,
ilevel0, ilevel1, lod_fpart,
packed_lo, packed_hi);
}
else {
/* Emit conditional to choose min image filter or mag image filter
* depending on the lod being > 0 or <= 0, respectively.
*/
struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef minify;

flow_ctx = lp_build_flow_create(builder);
lp_build_flow_scope_begin(flow_ctx);

packed_lo = LLVMGetUndef(h16_vec_type);
packed_hi = LLVMGetUndef(h16_vec_type);
/* minify = lod >= 0.0 */
minify = LLVMBuildICmp(builder, LLVMIntSGE,
lod_ipart, int_bld->zero, "");

lp_build_flow_scope_declare(flow_ctx, &packed_lo);
lp_build_flow_scope_declare(flow_ctx, &packed_hi);

/* minify = lod > 0.0 */
minify = LLVMBuildFCmp(builder, LLVMRealUGE,
lod, float_bld->zero, "");

lp_build_if(&if_ctx, flow_ctx, builder, minify);
lp_build_if(&if_ctx, builder, minify);
{
/* Use the minification filter */
lp_build_sample_mipmap(bld, min_filter, mip_filter,
s, t, r, lod_fpart,
width0_vec, width1_vec,
height0_vec, height1_vec,
depth0_vec, depth1_vec,
row_stride0_vec, row_stride1_vec,
img_stride0_vec, img_stride1_vec,
data_ptr0, data_ptr1,
&packed_lo, &packed_hi);
lp_build_sample_mipmap(bld,
min_filter, mip_filter,
s, t, r,
ilevel0, ilevel1, lod_fpart,
packed_lo, packed_hi);
}
lp_build_else(&if_ctx);
{
/* Use the magnification filter */
lp_build_sample_mipmap(bld, mag_filter, mip_filter,
s, t, r, lod_fpart,
width0_vec, width1_vec,
height0_vec, height1_vec,
depth0_vec, depth1_vec,
row_stride0_vec, row_stride1_vec,
img_stride0_vec, img_stride1_vec,
data_ptr0, data_ptr1,
&packed_lo, &packed_hi);
lp_build_sample_mipmap(bld,
mag_filter, PIPE_TEX_MIPFILTER_NONE,
s, t, r,
i32t_zero, NULL, NULL,
packed_lo, packed_hi);
}
lp_build_endif(&if_ctx);

lp_build_flow_scope_end(flow_ctx);
lp_build_flow_destroy(flow_ctx);
}

/* combine 'packed_lo', 'packed_hi' into 'packed' */
{
struct lp_build_context h16, u8n;

lp_build_context_init(&h16, builder, lp_type_ufixed(16));
lp_build_context_init(&u8n, builder, lp_type_unorm(8));

packed = lp_build_pack2(builder, h16.type, u8n.type,
packed_lo, packed_hi);
}
/*
* combine the values stored in 'packed_lo' and 'packed_hi' variables
* into 'packed'
*/
packed = lp_build_pack2(builder,
h16_bld.type, lp_type_unorm(8),
LLVMBuildLoad(builder, packed_lo, ""),
LLVMBuildLoad(builder, packed_hi, ""));

/*
* Convert to SoA and swizzle.

+ 0
- 9
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h Vedi File

@@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
LLVMValueRef row_stride_array,
LLVMValueRef img_stride_array,
LLVMValueRef data_array,
LLVMValueRef texel_out[4]);



+ 366
- 328
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
File diff soppresso perché troppo grande
Vedi File


+ 77
- 0
src/gallium/auxiliary/gallivm/lp_bld_swizzle.c Vedi File

@@ -100,6 +100,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
}


/**
* Combined extract and broadcast (or a mere shuffle when the two types match)
*/
LLVMValueRef
lp_build_extract_broadcast(LLVMBuilderRef builder,
struct lp_type src_type,
struct lp_type dst_type,
LLVMValueRef vector,
LLVMValueRef index)
{
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef res;

assert(src_type.floating == dst_type.floating);
assert(src_type.width == dst_type.width);

assert(lp_check_value(src_type, vector));
assert(LLVMTypeOf(index) == i32t);

if (src_type.length == 1) {
if (dst_type.length == 1) {
/*
* Trivial scalar -> scalar.
*/

res = vector;
}
else {
/*
* Broadcast scalar -> vector.
*/

res = lp_build_broadcast(builder,
lp_build_vec_type(dst_type),
vector);
}
}
else {
if (dst_type.length == src_type.length) {
/*
* Special shuffle of the same size.
*/

LLVMValueRef shuffle;
shuffle = lp_build_broadcast(builder,
LLVMVectorType(i32t, dst_type.length),
index);
res = LLVMBuildShuffleVector(builder, vector,
LLVMGetUndef(lp_build_vec_type(dst_type)),
shuffle, "");
}
else {
LLVMValueRef scalar;
scalar = LLVMBuildExtractElement(builder, vector, index, "");
if (dst_type.length == 1) {
/*
* Trivial extract scalar from vector.
*/

res = scalar;
}
else {
/*
* General case of different sized vectors.
*/

res = lp_build_broadcast(builder,
lp_build_vec_type(dst_type),
vector);
}
}
}

return res;
}


/**
* Swizzle one channel into all other three channels.
*/

+ 8
- 0
src/gallium/auxiliary/gallivm/lp_bld_swizzle.h Vedi File

@@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar);


LLVMValueRef
lp_build_extract_broadcast(LLVMBuilderRef builder,
struct lp_type src_type,
struct lp_type dst_type,
LLVMValueRef vector,
LLVMValueRef index);


/**
* Broadcast one channel of a vector composed of arrays of XYZW structures into
* all four channel.

+ 77
- 0
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h Vedi File

@@ -36,6 +36,9 @@
#define LP_BLD_TGSI_H

#include "gallivm/lp_bld.h"
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"


struct tgsi_token;
@@ -54,6 +57,75 @@ enum lp_build_tex_modifier {
};


/**
* Describe a channel of a register.
*
* The value can be a:
* - immediate value (i.e. derived from a IMM register)
* - CONST[n].x/y/z/w
* - IN[n].x/y/z/w
* - undetermined (when .file == TGSI_FILE_NULL)
*
* This is one of the analysis results, and is used to described
* the output color in terms of inputs.
*/
struct lp_tgsi_channel_info
{
unsigned file:4; /* TGSI_FILE_* */
unsigned swizzle:3; /* PIPE_SWIZZLE_x */
union {
uint32_t index;
float value; /* for TGSI_FILE_IMMEDIATE */
} u;
};


/**
* Describe a texture sampler interpolator.
*
* The interpolation is described in terms of regular inputs.
*/
struct lp_tgsi_texture_info
{
struct lp_tgsi_channel_info coord[4];
unsigned target:8; /* TGSI_TEXTURE_* */
unsigned unit:8; /* Sampler unit */
unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
};


struct lp_tgsi_info
{
struct tgsi_shader_info base;

/*
* Whether any of the texture opcodes access a register file other than
* TGSI_FILE_INPUT.
*
* We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
* benefit.
*/
unsigned indirect_textures:1;

/*
* Texture opcode description. Aimed at detecting and described direct
* texture opcodes.
*/
unsigned num_texs;
struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];

/*
* Output description. Aimed at detecting and describing simple blit
* shaders.
*/
struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];

/*
* Shortcut pointers into the above (for fragment shaders).
*/
const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
};

/**
* Sampler code generation interface.
*
@@ -96,6 +168,11 @@ struct lp_build_sampler_aos
};


void
lp_build_tgsi_info(const struct tgsi_token *tokens,
struct lp_tgsi_info *info);


void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,

+ 479
- 0
src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c Vedi File

@@ -0,0 +1,479 @@
/**************************************************************************
*
* Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
**************************************************************************/


#include "util/u_memory.h"
#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
#include "lp_bld_debug.h"
#include "lp_bld_tgsi.h"


/**
* Analysis context.
*
* This is where we keep store the value of each channel of the IMM/TEMP/OUT
* register values, as we walk the shader.
*/
struct analysis_context
{
struct lp_tgsi_info *info;

unsigned num_imms;
float imm[32][4];

struct lp_tgsi_channel_info temp[32][4];
};


/**
* Describe the specified channel of the src register.
*/
static void
analyse_src(struct analysis_context *ctx,
struct lp_tgsi_channel_info *chan_info,
const struct tgsi_src_register *src,
unsigned chan)
{
chan_info->file = TGSI_FILE_NULL;
if (!src->Indirect && !src->Absolute && !src->Negate) {
unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan);
if (src->File == TGSI_FILE_TEMPORARY) {
if (src->Index < Elements(ctx->temp)) {
*chan_info = ctx->temp[src->Index][swizzle];
}
} else {
chan_info->file = src->File;
if (src->File == TGSI_FILE_IMMEDIATE) {
assert(src->Index < Elements(ctx->imm));
if (src->Index < Elements(ctx->imm)) {
chan_info->u.value = ctx->imm[src->Index][swizzle];
}
} else {
chan_info->u.index = src->Index;
chan_info->swizzle = swizzle;
}
}
}
}


/**
* Whether this register channel refers to a specific immediate value.
*/
static boolean
is_immediate(const struct lp_tgsi_channel_info *chan_info, float value)
{
return chan_info->file == TGSI_FILE_IMMEDIATE &&
chan_info->u.value == value;
}


static void
analyse_tex(struct analysis_context *ctx,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier)
{
struct lp_tgsi_info *info = ctx->info;
unsigned chan;

if (info->num_texs < Elements(info->tex)) {
struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
bool indirect = FALSE;
unsigned readmask = 0;

tex_info->target = inst->Texture.Texture;
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
readmask = TGSI_WRITEMASK_X;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
readmask = TGSI_WRITEMASK_XY;
break;
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
readmask = TGSI_WRITEMASK_XYZ;
break;
default:
assert(0);
return;
}

if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
/* We don't track explicit derivatives, although we could */
indirect = TRUE;
tex_info->unit = inst->Src[3].Register.Index;
} else {
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
readmask |= TGSI_WRITEMASK_W;
}
tex_info->unit = inst->Src[1].Register.Index;
}

for (chan = 0; chan < 4; ++chan) {
struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan];
if (readmask & (1 << chan)) {
analyse_src(ctx, chan_info, &inst->Src[0].Register, chan);
if (chan_info->file != TGSI_FILE_INPUT) {
indirect = TRUE;
}
} else {
memset(chan_info, 0, sizeof *chan_info);
}
}

if (indirect) {
info->indirect_textures = TRUE;
}

++info->num_texs;
} else {
info->indirect_textures = TRUE;
}
}


/**
* Process an instruction, and update the register values accordingly.
*/
static void
analyse_instruction(struct analysis_context *ctx,
struct tgsi_full_instruction *inst)
{
struct lp_tgsi_info *info = ctx->info;
struct lp_tgsi_channel_info (*regs)[4];
unsigned max_regs;
unsigned i;
unsigned index;
unsigned chan;

for (i = 0; i < inst->Instruction.NumDstRegs; ++i) {
const struct tgsi_dst_register *dst = &inst->Dst[i].Register;

/*
* Get the lp_tgsi_channel_info array corresponding to the destination
* register file.
*/

if (dst->File == TGSI_FILE_TEMPORARY) {
regs = ctx->temp;
max_regs = Elements(ctx->temp);
} else if (dst->File == TGSI_FILE_OUTPUT) {
regs = info->output;
max_regs = Elements(info->output);
} else if (dst->File == TGSI_FILE_ADDRESS ||
dst->File == TGSI_FILE_PREDICATE) {
continue;
} else {
assert(0);
continue;
}

/*
* Detect direct TEX instructions
*/

switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_TEX:
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
break;
case TGSI_OPCODE_TXD:
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
break;
case TGSI_OPCODE_TXB:
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
break;
case TGSI_OPCODE_TXL:
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
break;
case TGSI_OPCODE_TXP:
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
break;
default:
break;
}

/*
* Keep track of assignments and writes
*/

if (dst->Indirect) {
/*
* It could be any register index so clear all register indices.
*/

for (chan = 0; chan < 4; ++chan) {
if (dst->WriteMask & (1 << chan)) {
for (index = 0; index < max_regs; ++index) {
regs[index][chan].file = TGSI_FILE_NULL;
}
}
}
} else if (dst->Index < max_regs) {
/*
* Update this destination register value.
*/

struct lp_tgsi_channel_info res[4];

memset(res, 0, sizeof res);

if (!inst->Instruction.Predicate &&
!inst->Instruction.Saturate) {
for (chan = 0; chan < 4; ++chan) {
if (dst->WriteMask & (1 << chan)) {
if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
analyse_src(ctx, &res[chan],
&inst->Src[0].Register, chan);
} else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
/*
* Propagate values across 1.0 and 0.0 multiplications.
*/

struct lp_tgsi_channel_info src0;
struct lp_tgsi_channel_info src1;

analyse_src(ctx, &src0, &inst->Src[0].Register, chan);
analyse_src(ctx, &src1, &inst->Src[1].Register, chan);

if (is_immediate(&src0, 0.0f)) {
res[chan] = src0;
} else if (is_immediate(&src1, 0.0f)) {
res[chan] = src1;
} else if (is_immediate(&src0, 1.0f)) {
res[chan] = src1;
} else if (is_immediate(&src1, 1.0f)) {
res[chan] = src0;
}
}
}
}
}

for (chan = 0; chan < 4; ++chan) {
if (dst->WriteMask & (1 << chan)) {
regs[dst->Index][chan] = res[chan];
}
}
}
}

/*
* Clear all temporaries information in presence of a control flow opcode.
*/

switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_IF:
case TGSI_OPCODE_IFC:
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDIF:
case TGSI_OPCODE_BGNLOOP:
case TGSI_OPCODE_BRK:
case TGSI_OPCODE_BREAKC:
case TGSI_OPCODE_CONT:
case TGSI_OPCODE_ENDLOOP:
case TGSI_OPCODE_CALLNZ:
case TGSI_OPCODE_CAL:
case TGSI_OPCODE_BGNSUB:
case TGSI_OPCODE_ENDSUB:
case TGSI_OPCODE_SWITCH:
case TGSI_OPCODE_CASE:
case TGSI_OPCODE_DEFAULT:
case TGSI_OPCODE_ENDSWITCH:
case TGSI_OPCODE_RET:
case TGSI_OPCODE_END:
/* XXX: Are there more cases? */
memset(&ctx->temp, 0, sizeof ctx->temp);
memset(&info->output, 0, sizeof info->output);
default:
break;
}
}


static INLINE void
dump_info(const struct tgsi_token *tokens,
struct lp_tgsi_info *info)
{
unsigned index;
unsigned chan;

tgsi_dump(tokens, 0);

for (index = 0; index < info->num_texs; ++index) {
const struct lp_tgsi_texture_info *tex_info = &info->tex[index];
debug_printf("TEX[%u] =", index);
for (chan = 0; chan < 4; ++chan) {
const struct lp_tgsi_channel_info *chan_info =
&tex_info->coord[chan];
if (chan_info->file != TGSI_FILE_NULL) {
debug_printf(" %s[%u].%c",
tgsi_file_names[chan_info->file],
chan_info->u.index,
"xyzw01"[chan_info->swizzle]);
} else {
debug_printf(" _");
}
}
debug_printf(", SAMP[%u], %s\n",
tex_info->unit,
tgsi_texture_names[tex_info->target]);
}

for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) {
for (chan = 0; chan < 4; ++chan) {
const struct lp_tgsi_channel_info *chan_info =
&info->output[index][chan];
if (chan_info->file != TGSI_FILE_NULL) {
debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]);
if (chan_info->file == TGSI_FILE_IMMEDIATE) {
debug_printf("%f", chan_info->u.value);
} else {
const char *file_name;
switch (chan_info->file) {
case TGSI_FILE_CONSTANT:
file_name = "CONST";
break;
case TGSI_FILE_INPUT:
file_name = "IN";
break;
default:
file_name = "???";
break;
}
debug_printf("%s[%u].%c",
file_name,
chan_info->u.index,
"xyzw01"[chan_info->swizzle]);
}
debug_printf("\n");
}
}
}
}


/**
* Detect any direct relationship between the output color
*/
void
lp_build_tgsi_info(const struct tgsi_token *tokens,
struct lp_tgsi_info *info)
{
struct tgsi_parse_context parse;
struct analysis_context ctx;
unsigned index;
unsigned chan;

memset(info, 0, sizeof *info);

tgsi_scan_shader(tokens, &info->base);

memset(&ctx, 0, sizeof ctx);
ctx.info = info;

tgsi_parse_init(&parse, tokens);

while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);

switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
break;

case TGSI_TOKEN_TYPE_INSTRUCTION:
{
struct tgsi_full_instruction *inst =
&parse.FullToken.FullInstruction;

if (inst->Instruction.Opcode == TGSI_OPCODE_END ||
inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
/* We reached the end of main function body. */
goto finished;
}

analyse_instruction(&ctx, inst);
}
break;

case TGSI_TOKEN_TYPE_IMMEDIATE:
{
const unsigned size =
parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert(size <= 4);
if (ctx.num_imms < Elements(ctx.imm)) {
for (chan = 0; chan < size; ++chan) {
ctx.imm[ctx.num_imms][chan] =
parse.FullToken.FullImmediate.u[chan].Float;
}
++ctx.num_imms;
}
}
break;

case TGSI_TOKEN_TYPE_PROPERTY:
break;

default:
assert(0);
}
}
finished:

tgsi_parse_free(&parse);


/*
* Link the output color values.
*/

for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) {
const struct lp_tgsi_channel_info null_output[4];
info->cbuf[index] = null_output;
}

for (index = 0; index < info->base.num_outputs; ++index) {
unsigned semantic_name = info->base.output_semantic_name[index];
unsigned semantic_index = info->base.output_semantic_index[index];
if (semantic_name == TGSI_SEMANTIC_COLOR &&
semantic_index < PIPE_MAX_COLOR_BUFS) {
info->cbuf[semantic_index] = info->output[index];
}
}

if (gallivm_debug & GALLIVM_DEBUG_TGSI) {
dump_info(tokens, info);
}
}

+ 61
- 11
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c Vedi File

@@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
}

if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
for (i = 0; i < num_coords; i++) {
ddx[i] = emit_fetch( bld, inst, 1, i );
ddy[i] = emit_fetch( bld, inst, 2, i );
LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
}
unit = inst->Src[3].Register.Index;
} else {
for (i = 0; i < num_coords; i++) {
ddx[i] = lp_build_ddx( &bld->base, coords[i] );
ddy[i] = lp_build_ddy( &bld->base, coords[i] );
ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
}
unit = inst->Src[1].Register.Index;
}
for (i = num_coords; i < 3; i++) {
ddx[i] = bld->base.undef;
ddy[i] = bld->base.undef;
ddx[i] = LLVMGetUndef(bld->base.elem_type);
ddy[i] = LLVMGetUndef(bld->base.elem_type);
}

bld->sampler->emit_fetch_texel(bld->sampler,
@@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
texel);
}

static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
int pc)
{
int i;

for (i = 0; i < 5; i++) {
unsigned opcode;

if (pc + i >= bld->info->num_instructions)
return TRUE;

opcode = bld->instructions[pc + i].Instruction.Opcode;

if (opcode == TGSI_OPCODE_END)
return TRUE;

if (opcode == TGSI_OPCODE_TEX ||
opcode == TGSI_OPCODE_TXP ||
opcode == TGSI_OPCODE_TXD ||
opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXL ||
opcode == TGSI_OPCODE_TXF ||
opcode == TGSI_OPCODE_TXQ ||
opcode == TGSI_OPCODE_CAL ||
opcode == TGSI_OPCODE_CALLNZ ||
opcode == TGSI_OPCODE_IF ||
opcode == TGSI_OPCODE_IFC ||
opcode == TGSI_OPCODE_BGNLOOP ||
opcode == TGSI_OPCODE_SWITCH)
return FALSE;
}

return TRUE;
}



/**
* Kill fragment if any of the src register values are negative.
@@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
static void
emit_kil(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst )
const struct tgsi_full_instruction *inst,
int pc)
{
const struct tgsi_full_src_register *reg = &inst->Src[0];
LLVMValueRef terms[NUM_CHANNELS];
@@ -959,8 +1001,12 @@ emit_kil(
}
}

if(mask)
if(mask) {
lp_build_mask_update(bld->mask, mask);

if (!near_end_of_shader(bld, pc))
lp_build_mask_check(bld->mask);
}
}


@@ -972,7 +1018,8 @@ emit_kil(
*/
static void
emit_kilp(struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst)
const struct tgsi_full_instruction *inst,
int pc)
{
LLVMValueRef mask;

@@ -987,6 +1034,9 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
}

lp_build_mask_update(bld->mask, mask);

if (!near_end_of_shader(bld, pc))
lp_build_mask_check(bld->mask);
}

static void
@@ -1535,12 +1585,12 @@ emit_instruction(

case TGSI_OPCODE_KILP:
/* predicated kill */
emit_kilp( bld, inst );
emit_kilp( bld, inst, (*pc)-1 );
break;

case TGSI_OPCODE_KIL:
/* conditional kill */
emit_kil( bld, inst );
emit_kil( bld, inst, (*pc)-1 );
break;

case TGSI_OPCODE_PK2H:

+ 20
- 13
src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c Vedi File

@@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = {
};


static INLINE boolean
static INLINE int
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
@@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
void *map;

if(buf->base.base.size < size)
return FALSE;
return 0;

/* be lenient with size */
if(buf->base.base.size >= 2*size)
return FALSE;
return 0;
if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
return FALSE;
return 0;
if(!pb_check_usage(desc->usage, buf->base.base.usage))
return FALSE;
return 0;

map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
if (!map) {
return FALSE;
return -1;
}

pb_unmap(buf->buffer);
return TRUE;
return 1;
}


@@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
struct pb_cache_buffer *curr_buf;
struct list_head *curr, *next;
int64_t now;
int ret = 0;

pipe_mutex_lock(mgr->mutex);

buf = NULL;
@@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
now = os_time_get();
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
buf = curr_buf;
if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
buf = curr_buf;
else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
_pb_cache_buffer_destroy(curr_buf);
_pb_cache_buffer_destroy(curr_buf);
else
/* This buffer (and all hereafter) are still hot in cache */
break;
if (ret == -1)
break;
curr = next;
next = curr->next;
}

/* keep searching in the hot buffers */
if(!buf) {
if(!buf && ret != -1) {
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
if(pb_cache_is_buffer_compat(curr_buf, size, desc)) {
ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
if (ret > 0) {
buf = curr_buf;
break;
}
if (ret == -1)
break;
/* no need to check the timeout here */
curr = next;
next = curr->next;
@@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
if(buf) {
LIST_DEL(&buf->head);
--mgr->numDelayed;
pipe_mutex_unlock(mgr->mutex);
/* Increase refcount */
pipe_reference_init(&buf->base.base.reference, 1);

+ 10
- 10
src/gallium/auxiliary/rbug/rbug_context.c Vedi File

@@ -480,7 +480,7 @@ struct rbug_proto_context_list * rbug_demarshal_context_list(struct rbug_proto_h

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST)
return NULL;

pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_context_info * rbug_demarshal_context_info(struct rbug_proto_h

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO)
return NULL;

pos = 0;
@@ -533,7 +533,7 @@ struct rbug_proto_context_draw_block * rbug_demarshal_context_draw_block(struct

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
return NULL;

pos = 0;
@@ -561,7 +561,7 @@ struct rbug_proto_context_draw_step * rbug_demarshal_context_draw_step(struct rb

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_STEP)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_STEP)
return NULL;

pos = 0;
@@ -589,7 +589,7 @@ struct rbug_proto_context_draw_unblock * rbug_demarshal_context_draw_unblock(str

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
return NULL;

pos = 0;
@@ -617,7 +617,7 @@ struct rbug_proto_context_draw_rule * rbug_demarshal_context_draw_rule(struct rb

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_RULE)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_RULE)
return NULL;

pos = 0;
@@ -649,7 +649,7 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_FLUSH)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_FLUSH)
return NULL;

pos = 0;
@@ -677,7 +677,7 @@ struct rbug_proto_context_list_reply * rbug_demarshal_context_list_reply(struct

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST_REPLY)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST_REPLY)
return NULL;

pos = 0;
@@ -705,7 +705,7 @@ struct rbug_proto_context_info_reply * rbug_demarshal_context_info_reply(struct

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO_REPLY)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO_REPLY)
return NULL;

pos = 0;
@@ -739,7 +739,7 @@ struct rbug_proto_context_draw_blocked * rbug_demarshal_context_draw_blocked(str

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
return NULL;

pos = 0;

+ 5
- 5
src/gallium/auxiliary/rbug/rbug_core.c Vedi File

@@ -233,7 +233,7 @@ struct rbug_proto_noop * rbug_demarshal_noop(struct rbug_proto_header *header)

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_NOOP)
if (header->opcode != (int32_t)RBUG_OP_NOOP)
return NULL;

pos = 0;
@@ -259,7 +259,7 @@ struct rbug_proto_ping * rbug_demarshal_ping(struct rbug_proto_header *header)

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_PING)
if (header->opcode != (int32_t)RBUG_OP_PING)
return NULL;

pos = 0;
@@ -285,7 +285,7 @@ struct rbug_proto_error * rbug_demarshal_error(struct rbug_proto_header *header)

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_ERROR)
if (header->opcode != (int32_t)RBUG_OP_ERROR)
return NULL;

pos = 0;
@@ -312,7 +312,7 @@ struct rbug_proto_ping_reply * rbug_demarshal_ping_reply(struct rbug_proto_heade

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_PING_REPLY)
if (header->opcode != (int32_t)RBUG_OP_PING_REPLY)
return NULL;

pos = 0;
@@ -339,7 +339,7 @@ struct rbug_proto_error_reply * rbug_demarshal_error_reply(struct rbug_proto_hea

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_ERROR_REPLY)
if (header->opcode != (int32_t)RBUG_OP_ERROR_REPLY)
return NULL;

pos = 0;

+ 64
- 0
src/gallium/auxiliary/rbug/rbug_demarshal.c Vedi File

@@ -91,3 +91,67 @@ struct rbug_header * rbug_demarshal(struct rbug_proto_header *header)
return NULL;
}
}

const char* rbug_proto_get_name(enum rbug_opcode opcode)
{
switch(opcode) {
case RBUG_OP_NOOP:
return "RBUG_OP_NOOP";
case RBUG_OP_PING:
return "RBUG_OP_PING";
case RBUG_OP_ERROR:
return "RBUG_OP_ERROR";
case RBUG_OP_PING_REPLY:
return "RBUG_OP_PING_REPLY";
case RBUG_OP_ERROR_REPLY:
return "RBUG_OP_ERROR_REPLY";
case RBUG_OP_TEXTURE_LIST:
return "RBUG_OP_TEXTURE_LIST";
case RBUG_OP_TEXTURE_INFO:
return "RBUG_OP_TEXTURE_INFO";
case RBUG_OP_TEXTURE_WRITE:
return "RBUG_OP_TEXTURE_WRITE";
case RBUG_OP_TEXTURE_READ:
return "RBUG_OP_TEXTURE_READ";
case RBUG_OP_TEXTURE_LIST_REPLY:
return "RBUG_OP_TEXTURE_LIST_REPLY";
case RBUG_OP_TEXTURE_INFO_REPLY:
return "RBUG_OP_TEXTURE_INFO_REPLY";
case RBUG_OP_TEXTURE_READ_REPLY:
return "RBUG_OP_TEXTURE_READ_REPLY";
case RBUG_OP_CONTEXT_LIST:
return "RBUG_OP_CONTEXT_LIST";
case RBUG_OP_CONTEXT_INFO:
return "RBUG_OP_CONTEXT_INFO";
case RBUG_OP_CONTEXT_DRAW_BLOCK:
return "RBUG_OP_CONTEXT_DRAW_BLOCK";
case RBUG_OP_CONTEXT_DRAW_STEP:
return "RBUG_OP_CONTEXT_DRAW_STEP";
case RBUG_OP_CONTEXT_DRAW_UNBLOCK:
return "RBUG_OP_CONTEXT_DRAW_UNBLOCK";
case RBUG_OP_CONTEXT_DRAW_RULE:
return "RBUG_OP_CONTEXT_DRAW_RULE";
case RBUG_OP_CONTEXT_FLUSH:
return "RBUG_OP_CONTEXT_FLUSH";
case RBUG_OP_CONTEXT_LIST_REPLY:
return "RBUG_OP_CONTEXT_LIST_REPLY";
case RBUG_OP_CONTEXT_INFO_REPLY:
return "RBUG_OP_CONTEXT_INFO_REPLY";
case RBUG_OP_CONTEXT_DRAW_BLOCKED:
return "RBUG_OP_CONTEXT_DRAW_BLOCKED";
case RBUG_OP_SHADER_LIST:
return "RBUG_OP_SHADER_LIST";
case RBUG_OP_SHADER_INFO:
return "RBUG_OP_SHADER_INFO";
case RBUG_OP_SHADER_DISABLE:
return "RBUG_OP_SHADER_DISABLE";
case RBUG_OP_SHADER_REPLACE:
return "RBUG_OP_SHADER_REPLACE";
case RBUG_OP_SHADER_LIST_REPLY:
return "RBUG_OP_SHADER_LIST_REPLY";
case RBUG_OP_SHADER_INFO_REPLY:
return "RBUG_OP_SHADER_INFO_REPLY";
default:
return NULL;
}
}

+ 5
- 0
src/gallium/auxiliary/rbug/rbug_proto.h Vedi File

@@ -91,4 +91,9 @@ struct rbug_proto_header
*/
struct rbug_connection;

/**
* Get printable string for opcode.
*/
const char* rbug_proto_get_name(enum rbug_opcode opcode);

#endif

+ 6
- 6
src/gallium/auxiliary/rbug/rbug_shader.c Vedi File

@@ -305,7 +305,7 @@ struct rbug_proto_shader_list * rbug_demarshal_shader_list(struct rbug_proto_hea

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST)
if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST)
return NULL;

pos = 0;
@@ -332,7 +332,7 @@ struct rbug_proto_shader_info * rbug_demarshal_shader_info(struct rbug_proto_hea

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO)
if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO)
return NULL;

pos = 0;
@@ -360,7 +360,7 @@ struct rbug_proto_shader_disable * rbug_demarshal_shader_disable(struct rbug_pro

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_SHADER_DISABLE)
if (header->opcode != (int32_t)RBUG_OP_SHADER_DISABLE)
return NULL;

pos = 0;
@@ -389,7 +389,7 @@ struct rbug_proto_shader_replace * rbug_demarshal_shader_replace(struct rbug_pro

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_SHADER_REPLACE)
if (header->opcode != (int32_t)RBUG_OP_SHADER_REPLACE)
return NULL;

pos = 0;
@@ -418,7 +418,7 @@ struct rbug_proto_shader_list_reply * rbug_demarshal_shader_list_reply(struct rb

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST_REPLY)
if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST_REPLY)
return NULL;

pos = 0;
@@ -446,7 +446,7 @@ struct rbug_proto_shader_info_reply * rbug_demarshal_shader_info_reply(struct rb

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO_REPLY)
if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO_REPLY)
return NULL;

pos = 0;

+ 7
- 7
src/gallium/auxiliary/rbug/rbug_texture.c Vedi File

@@ -417,7 +417,7 @@ struct rbug_proto_texture_list * rbug_demarshal_texture_list(struct rbug_proto_h

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST)
return NULL;

pos = 0;
@@ -443,7 +443,7 @@ struct rbug_proto_texture_info * rbug_demarshal_texture_info(struct rbug_proto_h

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO)
return NULL;

pos = 0;
@@ -470,7 +470,7 @@ struct rbug_proto_texture_write * rbug_demarshal_texture_write(struct rbug_proto

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_WRITE)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_WRITE)
return NULL;

pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_texture_read * rbug_demarshal_texture_read(struct rbug_proto_h

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ)
return NULL;

pos = 0;
@@ -540,7 +540,7 @@ struct rbug_proto_texture_list_reply * rbug_demarshal_texture_list_reply(struct

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST_REPLY)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST_REPLY)
return NULL;

pos = 0;
@@ -568,7 +568,7 @@ struct rbug_proto_texture_info_reply * rbug_demarshal_texture_info_reply(struct

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO_REPLY)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO_REPLY)
return NULL;

pos = 0;
@@ -606,7 +606,7 @@ struct rbug_proto_texture_read_reply * rbug_demarshal_texture_read_reply(struct

if (!header)
return NULL;
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ_REPLY)
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ_REPLY)
return NULL;

pos = 0;

+ 0
- 1
src/gallium/auxiliary/rtasm/rtasm_execmem.c Vedi File

@@ -58,7 +58,6 @@

#include <unistd.h>
#include <sys/mman.h>
#include "os/os_thread.h"
#include "util/u_mm.h"

#define EXEC_HEAP_SIZE (10*1024*1024)

+ 24
- 15
src/gallium/auxiliary/target-helpers/inline_sw_helper.h Vedi File

@@ -23,25 +23,12 @@
#include "cell/ppu/cell_public.h"
#endif


static INLINE struct pipe_screen *
sw_screen_create(struct sw_winsys *winsys)
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
{
const char *default_driver;
const char *driver;
struct pipe_screen *screen = NULL;

#if defined(GALLIUM_CELL)
default_driver = "cell";
#elif defined(GALLIUM_LLVMPIPE)
default_driver = "llvmpipe";
#elif defined(GALLIUM_SOFTPIPE)
default_driver = "softpipe";
#else
default_driver = "";
#endif

driver = debug_get_option("GALLIUM_DRIVER", default_driver);

#if defined(GALLIUM_CELL)
if (screen == NULL && strcmp(driver, "cell") == 0)
screen = cell_create_screen(winsys);
@@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys)
return screen;
}


static INLINE struct pipe_screen *
sw_screen_create(struct sw_winsys *winsys)
{
const char *default_driver;
const char *driver;

#if defined(GALLIUM_CELL)
default_driver = "cell";
#elif defined(GALLIUM_LLVMPIPE)
default_driver = "llvmpipe";
#elif defined(GALLIUM_SOFTPIPE)
default_driver = "softpipe";
#else
default_driver = "";
#endif

driver = debug_get_option("GALLIUM_DRIVER", default_driver);
return sw_screen_create_named(winsys, driver);
}


#endif

+ 12
- 6
src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h Vedi File

@@ -13,22 +13,28 @@ static INLINE struct pipe_screen *
sw_screen_wrap(struct pipe_screen *screen)
{
struct sw_winsys *sws;
struct pipe_screen *sw_screen;
struct pipe_screen *sw_screen = NULL;
const char *driver;

sws = wrapper_sw_winsys_warp_pipe_screen(screen);
driver = debug_get_option("GALLIUM_DRIVER", "native");
if (strcmp(driver, "native") == 0)
return screen;

sws = wrapper_sw_winsys_wrap_pipe_screen(screen);
if (!sws)
goto err;

sw_screen = sw_screen_create(sws);
if (sw_screen == screen)
sw_screen = sw_screen_create_named(sws, driver);

if (!sw_screen)
goto err_winsys;

return sw_screen;

err_winsys:
sws->destroy(sws);
return wrapper_sw_winsys_dewrap_pipe_screen(sws);
err:
return screen;
return screen;
}

#endif

+ 28
- 24
src/gallium/auxiliary/tgsi/tgsi_dump.c Vedi File

@@ -90,7 +90,8 @@ static const char *processor_type_names[] =
"GEOM"
};

static const char *file_names[TGSI_FILE_COUNT] =
const char *
tgsi_file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -125,7 +126,8 @@ static const char *semantic_names[] =
"FACE",
"EDGEFLAG",
"PRIM_ID",
"INSTANCEID"
"INSTANCEID",
"STENCIL"
};

static const char *immediate_type_names[] =
@@ -135,7 +137,8 @@ static const char *immediate_type_names[] =
"INT32"
};

static const char *swizzle_names[] =
const char *
tgsi_swizzle_names[] =
{
"x",
"y",
@@ -143,7 +146,8 @@ static const char *swizzle_names[] =
"w"
};

static const char *texture_names[] =
const char *
tgsi_texture_names[] =
{
"UNKNOWN",
"1D",
@@ -201,15 +205,15 @@ _dump_register_src(
struct dump_ctx *ctx,
const struct tgsi_full_src_register *src )
{
ENM(src->Register.File, file_names);
ENM(src->Register.File, tgsi_file_names);
if (src->Register.Dimension) {
if (src->Dimension.Indirect) {
CHR( '[' );
ENM( src->DimIndirect.File, file_names );
ENM( src->DimIndirect.File, tgsi_file_names );
CHR( '[' );
SID( src->DimIndirect.Index );
TXT( "]." );
ENM( src->DimIndirect.SwizzleX, swizzle_names );
ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names );
if (src->Dimension.Index != 0) {
if (src->Dimension.Index > 0)
CHR( '+' );
@@ -224,11 +228,11 @@ _dump_register_src(
}
if (src->Register.Indirect) {
CHR( '[' );
ENM( src->Indirect.File, file_names );
ENM( src->Indirect.File, tgsi_file_names );
CHR( '[' );
SID( src->Indirect.Index );
TXT( "]." );
ENM( src->Indirect.SwizzleX, swizzle_names );
ENM( src->Indirect.SwizzleX, tgsi_swizzle_names );
if (src->Register.Index != 0) {
if (src->Register.Index > 0)
CHR( '+' );
@@ -248,15 +252,15 @@ _dump_register_dst(
struct dump_ctx *ctx,
const struct tgsi_full_dst_register *dst )
{
ENM(dst->Register.File, file_names);
ENM(dst->Register.File, tgsi_file_names);
if (dst->Register.Dimension) {
if (dst->Dimension.Indirect) {
CHR( '[' );
ENM( dst->DimIndirect.File, file_names );
ENM( dst->DimIndirect.File, tgsi_file_names );
CHR( '[' );
SID( dst->DimIndirect.Index );
TXT( "]." );
ENM( dst->DimIndirect.SwizzleX, swizzle_names );
ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names );
if (dst->Dimension.Index != 0) {
if (dst->Dimension.Index > 0)
CHR( '+' );
@@ -271,11 +275,11 @@ _dump_register_dst(
}
if (dst->Register.Indirect) {
CHR( '[' );
ENM( dst->Indirect.File, file_names );
ENM( dst->Indirect.File, tgsi_file_names );
CHR( '[' );
SID( dst->Indirect.Index );
TXT( "]." );
ENM( dst->Indirect.SwizzleX, swizzle_names );
ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names );
if (dst->Register.Index != 0) {
if (dst->Register.Index > 0)
CHR( '+' );
@@ -351,7 +355,7 @@ iter_declaration(

TXT( "DCL " );

ENM(decl->Declaration.File, file_names);
ENM(decl->Declaration.File, tgsi_file_names);

/* all geometry shader inputs are two dimensional */
if (decl->Declaration.File == TGSI_FILE_INPUT &&
@@ -585,10 +589,10 @@ iter_instruction(
inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z ||
inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
ENM( inst->Predicate.SwizzleX, swizzle_names );
ENM( inst->Predicate.SwizzleY, swizzle_names );
ENM( inst->Predicate.SwizzleZ, swizzle_names );
ENM( inst->Predicate.SwizzleW, swizzle_names );
ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names );
ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names );
ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names );
ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names );
}

TXT( ") " );
@@ -641,10 +645,10 @@ iter_instruction(
src->Register.SwizzleZ != TGSI_SWIZZLE_Z ||
src->Register.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
ENM( src->Register.SwizzleX, swizzle_names );
ENM( src->Register.SwizzleY, swizzle_names );
ENM( src->Register.SwizzleZ, swizzle_names );
ENM( src->Register.SwizzleW, swizzle_names );
ENM( src->Register.SwizzleX, tgsi_swizzle_names );
ENM( src->Register.SwizzleY, tgsi_swizzle_names );
ENM( src->Register.SwizzleZ, tgsi_swizzle_names );
ENM( src->Register.SwizzleW, tgsi_swizzle_names );
}

if (src->Register.Absolute)
@@ -655,7 +659,7 @@ iter_instruction(

if (inst->Instruction.Texture) {
TXT( ", " );
ENM( inst->Texture.Texture, texture_names );
ENM( inst->Texture.Texture, tgsi_texture_names );
}

switch (inst->Instruction.Opcode) {

+ 9
- 0
src/gallium/auxiliary/tgsi/tgsi_dump.h Vedi File

@@ -35,6 +35,15 @@
extern "C" {
#endif

extern const char *
tgsi_file_names[TGSI_FILE_COUNT];

extern const char *
tgsi_swizzle_names[];

extern const char *
tgsi_texture_names[];

void
tgsi_dump_str(
const struct tgsi_token *tokens,

+ 4
- 2
src/gallium/auxiliary/tgsi/tgsi_exec.c Vedi File

@@ -605,8 +605,10 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
if ((inst->Src[i].Register.File ==
inst->Dst[0].Register.File) &&
(inst->Src[i].Register.Index ==
inst->Dst[0].Register.Index)) {
((inst->Src[i].Register.Index ==
inst->Dst[0].Register.Index) ||
inst->Src[i].Register.Indirect ||
inst->Dst[0].Register.Indirect)) {
/* loop over dest channels */
uint channelsWritten = 0x0;
FOR_EACH_ENABLED_CHANNEL(*inst, chan) {

+ 4
- 0
src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h Vedi File

@@ -163,6 +163,10 @@ OP12(USGE)
OP12(USHR)
OP12(USLT)
OP12(USNE)
OP01(SWITCH)
OP01(CASE)
OP00(DEFAULT)
OP00(ENDSWITCH)


#undef OP00

+ 6
- 3
src/gallium/auxiliary/tgsi/tgsi_scan.c Vedi File

@@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid;
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap;
info->num_inputs++;
}
@@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens,

/* extra info for special outputs */
if (procType == TGSI_PROCESSOR_FRAGMENT &&
fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
info->writes_z = TRUE;
}
fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION)
info->writes_z = TRUE;
if (procType == TGSI_PROCESSOR_FRAGMENT &&
fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL)
info->writes_stencil = TRUE;
if (procType == TGSI_PROCESSOR_VERTEX &&
fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) {
info->writes_edgeflag = TRUE;

+ 2
- 0
src/gallium/auxiliary/tgsi/tgsi_scan.h Vedi File

@@ -45,6 +45,7 @@ struct tgsi_shader_info
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
ubyte input_centroid[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
@@ -60,6 +61,7 @@ struct tgsi_shader_info
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */

boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */


+ 39
- 17
src/gallium/auxiliary/tgsi/tgsi_sse2.c Vedi File

@@ -2830,31 +2830,52 @@ static void soa_to_aos( struct x86_function *func,
* Check if the instructions dst register is the same as any src
* register and warn if there's a posible SOA dependency.
*/
static void
static boolean
check_soa_dependencies(const struct tgsi_full_instruction *inst)
{
switch (inst->Instruction.Opcode) {
uint opcode = inst->Instruction.Opcode;

/* XXX: we only handle src/dst aliasing in a few opcodes currently.
* Need to use an additional temporay to hold the result in the
* cases where the code is too opaque to fix.
*/

switch (opcode) {
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_EXP:
case TGSI_OPCODE_LOG:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
case TGSI_OPCODE_DP2A:
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_XPD:
case TGSI_OPCODE_DPH:
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_NRM:
case TGSI_OPCODE_NRM4:
case TGSI_OPCODE_DP2:
/* OK - these opcodes correctly handle SOA dependencies */
break;
return TRUE;
default:
if (tgsi_check_soa_dependencies(inst)) {
uint opcode = inst->Instruction.Opcode;
if (!tgsi_check_soa_dependencies(inst))
return TRUE;

/* XXX: we only handle src/dst aliasing in a few opcodes
* currently. Need to use an additional temporay to hold
* the result in the cases where the code is too opaque to
* fix.
*/
if (opcode != TGSI_OPCODE_MOV) {
debug_printf("Warning: src/dst aliasing in instruction"
" is not handled:\n");
tgsi_dump_instruction(inst, 1);
}
}
debug_printf("Warning: src/dst aliasing in instruction"
" is not handled:\n");
debug_printf("Warning: ");
tgsi_dump_instruction(inst, 1);

return FALSE;
}
}

@@ -2954,7 +2975,8 @@ tgsi_emit_sse2(
tgsi_get_processor_name(proc));
}

check_soa_dependencies(&parse.FullToken.FullInstruction);
if (ok)
ok = check_soa_dependencies(&parse.FullToken.FullInstruction);
break;

case TGSI_TOKEN_TYPE_IMMEDIATE:

+ 11
- 5
src/gallium/auxiliary/tgsi/tgsi_ureg.c Vedi File

@@ -96,7 +96,8 @@ struct ureg_program
unsigned semantic_name;
unsigned semantic_index;
unsigned interp;
unsigned cylindrical_wrap;
unsigned char cylindrical_wrap;
unsigned char centroid;
} fs_input[UREG_MAX_INPUT];
unsigned nr_fs_inputs;

@@ -286,11 +287,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,


struct ureg_src
ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
unsigned cylindrical_wrap)
unsigned cylindrical_wrap,
unsigned centroid)
{
unsigned i;

@@ -306,6 +308,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
ureg->fs_input[i].semantic_index = semantic_index;
ureg->fs_input[i].interp = interp_mode;
ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
ureg->fs_input[i].centroid = centroid;
ureg->nr_fs_inputs++;
} else {
set_bad(ureg);
@@ -1126,7 +1129,8 @@ emit_decl_fs(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interpolate,
unsigned cylindrical_wrap)
unsigned cylindrical_wrap,
unsigned centroid)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);

@@ -1138,6 +1142,7 @@ emit_decl_fs(struct ureg_program *ureg,
out[0].decl.Interpolate = interpolate;
out[0].decl.Semantic = 1;
out[0].decl.CylindricalWrap = cylindrical_wrap;
out[0].decl.Centroid = centroid;

out[1].value = 0;
out[1].decl_range.First = index;
@@ -1287,7 +1292,8 @@ static void emit_decls( struct ureg_program *ureg )
ureg->fs_input[i].semantic_name,
ureg->fs_input[i].semantic_index,
ureg->fs_input[i].interp,
ureg->fs_input[i].cylindrical_wrap);
ureg->fs_input[i].cylindrical_wrap,
ureg->fs_input[i].centroid);
}
} else {
for (i = 0; i < ureg->nr_gs_inputs; i++) {

+ 20
- 4
src/gallium/auxiliary/tgsi/tgsi_ureg.h Vedi File

@@ -158,11 +158,27 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
*/

struct ureg_src
ureg_DECL_fs_input_cyl(struct ureg_program *,
ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
unsigned cylindrical_wrap);
unsigned cylindrical_wrap,
unsigned centroid);

static INLINE struct ureg_src
ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
unsigned cylindrical_wrap)
{
return ureg_DECL_fs_input_cyl_centroid(ureg,
semantic_name,
semantic_index,
interp_mode,
cylindrical_wrap,
0);
}

static INLINE struct ureg_src
ureg_DECL_fs_input(struct ureg_program *ureg,
@@ -170,11 +186,11 @@ ureg_DECL_fs_input(struct ureg_program *ureg,
unsigned semantic_index,
unsigned interp_mode)
{
return ureg_DECL_fs_input_cyl(ureg,
return ureg_DECL_fs_input_cyl_centroid(ureg,
semantic_name,
semantic_index,
interp_mode,
0);
0, 0);
}

struct ureg_src

+ 47
- 0
src/gallium/auxiliary/util/u_atomic.h Vedi File

@@ -29,6 +29,8 @@
#define PIPE_ATOMIC_ASM_MSVC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
#define PIPE_ATOMIC_ASM_GCC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64))
#define PIPE_ATOMIC_ASM_GCC_X86_64
#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
#define PIPE_ATOMIC_GCC_INTRINSIC
#else
@@ -36,6 +38,51 @@
#endif


#if defined(PIPE_ATOMIC_ASM_GCC_X86_64)
#define PIPE_ATOMIC "GCC x86_64 assembly"

#ifdef __cplusplus
extern "C" {
#endif

#define p_atomic_set(_v, _i) (*(_v) = (_i))
#define p_atomic_read(_v) (*(_v))

static INLINE boolean
p_atomic_dec_zero(int32_t *v)
{
unsigned char c;

__asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c)
::"memory");

return c != 0;
}

static INLINE void
p_atomic_inc(int32_t *v)
{
__asm__ __volatile__("lock; incl %0":"+m"(*v));
}

static INLINE void
p_atomic_dec(int32_t *v)
{
__asm__ __volatile__("lock; decl %0":"+m"(*v));
}

static INLINE int32_t
p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
}

#ifdef __cplusplus
}
#endif

#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */


#if defined(PIPE_ATOMIC_ASM_GCC_X86)


+ 32
- 15
src/gallium/auxiliary/util/u_blitter.c Vedi File

@@ -268,7 +268,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
}

for (i = 0; i <= PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);

@@ -964,16 +964,18 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
blitter_restore_CSOs(ctx);
}

/* Clear a region of a depth stencil surface. */
void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *dstsurf)
/* draw a rectangle across a region using a custom dsa stage - for r600g */
void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *zsurf,
struct pipe_surface *cbsurf,
void *dsa_stage, float depth)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;

assert(dstsurf->texture);
if (!dstsurf->texture)
assert(zsurf->texture);
if (!zsurf->texture)
return;

/* check the saved state */
@@ -981,8 +983,8 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
assert(blitter->saved_fb_state.nr_cbufs != ~0);

/* bind CSOs */
pipe->bind_blend_state(pipe, ctx->blend_keep_color);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
pipe->bind_blend_state(pipe, ctx->blend_write_color);
pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);

pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
@@ -990,15 +992,30 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);

/* set a framebuffer state */
fb_state.width = dstsurf->width;
fb_state.height = dstsurf->height;
fb_state.nr_cbufs = 0;
fb_state.cbufs[0] = 0;
fb_state.zsbuf = dstsurf;
fb_state.width = zsurf->width;
fb_state.height = zsurf->height;
fb_state.nr_cbufs = 1;
if (cbsurf) {
fb_state.cbufs[0] = cbsurf;
fb_state.nr_cbufs = 1;
} else {
fb_state.cbufs[0] = NULL;
fb_state.nr_cbufs = 0;
}
fb_state.zsbuf = zsurf;
pipe->set_framebuffer_state(pipe, &fb_state);

blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0,
blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}

/* flush a region of a depth stencil surface for r300g */
void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *dstsurf)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
ctx->dsa_flush_depth_stencil, 0.0f);
}

+ 6
- 0
src/gallium/auxiliary/util/u_blitter.h Vedi File

@@ -203,6 +203,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,

void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *dstsurf);

void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *zsurf,
struct pipe_surface *cbsurf,
void *dsa_stage, float depth);

/* The functions below should be used to save currently bound constant state
* objects inside a driver. The objects are automatically restored at the end
* of the util_blitter_{clear, copy_region, fill_region} functions and then

+ 3
- 0
src/gallium/auxiliary/util/u_format.csv Vedi File

@@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___,
PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs
PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs
PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs
PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs
PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs
PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs
PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs
PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs
PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs

# YUV formats
# http://www.fourcc.org/yuv.php#UYVY

+ 53
- 0
src/gallium/auxiliary/util/u_format_zs.c Vedi File

@@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d
}
}


void
util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
{
util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
src_row, src_stride,
width, height);
}

void
util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
{
util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride,
src_row, src_stride,
width, height);
}

void
util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
{
util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride,
src_row, src_stride,
width, height);
}

void
util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
{
util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride,
src_row, src_stride,
width, height);
}

void
util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
const uint8_t *src_row, unsigned src_stride,
unsigned width, unsigned height)
{
util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
src_row, src_stride,
width, height);

}

void
util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
const uint8_t *src_row, unsigned src_stride,
unsigned width, unsigned height)
{
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride,
src_row, src_stride,
width, height);
}

+ 16
- 0
src/gallium/auxiliary/util/u_format_zs.h Vedi File

@@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned
void
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

void
util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

void
util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

void
util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

void
util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

void
util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

void
util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
#endif /* U_FORMAT_ZS_H_ */

+ 127
- 0
src/gallium/auxiliary/util/u_index_modify.c Vedi File

@@ -0,0 +1,127 @@
/*
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */

#include "pipe/p_context.h"
#include "util/u_index_modify.h"
#include "util/u_inlines.h"

void util_shorten_ubyte_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start,
unsigned count)
{
struct pipe_screen* screen = context->screen;
struct pipe_resource* new_elts;
unsigned char *in_map;
unsigned short *out_map;
struct pipe_transfer *src_transfer, *dst_transfer;
unsigned i;

new_elts = pipe_buffer_create(screen,
PIPE_BIND_INDEX_BUFFER,
2 * count);

in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);

in_map += start;

for (i = 0; i < count; i++) {
*out_map = (unsigned short)(*in_map + index_bias);
in_map++;
out_map++;
}

pipe_buffer_unmap(context, *elts, src_transfer);
pipe_buffer_unmap(context, new_elts, dst_transfer);

*elts = new_elts;
}

void util_rebuild_ushort_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start, unsigned count)
{
struct pipe_transfer *in_transfer = NULL;
struct pipe_transfer *out_transfer = NULL;
struct pipe_resource *new_elts;
unsigned short *in_map;
unsigned short *out_map;
unsigned i;

new_elts = pipe_buffer_create(context->screen,
PIPE_BIND_INDEX_BUFFER,
2 * count);

in_map = pipe_buffer_map(context, *elts,
PIPE_TRANSFER_READ, &in_transfer);
out_map = pipe_buffer_map(context, new_elts,
PIPE_TRANSFER_WRITE, &out_transfer);

in_map += start;
for (i = 0; i < count; i++) {
*out_map = (unsigned short)(*in_map + index_bias);
in_map++;
out_map++;
}

pipe_buffer_unmap(context, *elts, in_transfer);
pipe_buffer_unmap(context, new_elts, out_transfer);

*elts = new_elts;
}

void util_rebuild_uint_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start, unsigned count)
{
struct pipe_transfer *in_transfer = NULL;
struct pipe_transfer *out_transfer = NULL;
struct pipe_resource *new_elts;
unsigned int *in_map;
unsigned int *out_map;
unsigned i;

new_elts = pipe_buffer_create(context->screen,
PIPE_BIND_INDEX_BUFFER,
2 * count);

in_map = pipe_buffer_map(context, *elts,
PIPE_TRANSFER_READ, &in_transfer);
out_map = pipe_buffer_map(context, new_elts,
PIPE_TRANSFER_WRITE, &out_transfer);

in_map += start;
for (i = 0; i < count; i++) {
*out_map = (unsigned int)(*in_map + index_bias);
in_map++;
out_map++;
}

pipe_buffer_unmap(context, *elts, in_transfer);
pipe_buffer_unmap(context, new_elts, out_transfer);

*elts = new_elts;
}

src/gallium/winsys/r600/drm/radeon_draw.c → src/gallium/auxiliary/util/u_index_modify.h Vedi File

@@ -1,5 +1,5 @@
/*
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -18,40 +18,24 @@
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Jerome Glisse
*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "radeon_priv.h"
* USE OR OTHER DEALINGS IN THE SOFTWARE. */

/*
* draw functions
*/
int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon)
{
draw->radeon = radeon;
draw->state = calloc(radeon->max_states, sizeof(void*));
if (draw->state == NULL)
return -ENOMEM;
return 0;
}
#ifndef UTIL_INDEX_MODIFY_H
#define UTIL_INDEX_MODIFY_H

void util_shorten_ubyte_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start,
unsigned count);

void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state)
{
if (state == NULL)
return;
draw->state[state->state_id] = state;
}
void util_rebuild_ushort_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start, unsigned count);

void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state)
{
if (state == NULL)
return;
if (draw->state[state->state_id] == state) {
draw->state[state->state_id] = NULL;
}
}
void util_rebuild_uint_elts(struct pipe_context *context,
struct pipe_resource **elts,
int index_bias,
unsigned start, unsigned count);
#endif

+ 5
- 0
src/gallium/auxiliary/util/u_math.h Vedi File

@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val)
#endif


#ifndef M_SQRT2
#define M_SQRT2 1.41421356237309504880
#endif


#if defined(_MSC_VER)

#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)

+ 0
- 0
src/gallium/auxiliary/util/u_pack_color.h Vedi File


Dato che sono stati cambiati molti file in questo diff, alcuni di essi non verranno mostrati

Loading…
Annulla
Salva