Conflicts: src/gallium/drivers/llvmpipe/lp_setup_coef.c src/gallium/drivers/llvmpipe/lp_setup_coef.h src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_setup_tri.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.h

15 anni fa · 0072acd447
--- a/Makefile
+++ b/Makefile
@@ -329,6 +329,8 @@ GALLIUM_FILES = \
 	$(DIRECTORY)/src/gallium/Makefile.template			\
 	$(DIRECTORY)/src/gallium/SConscript				\
 	$(DIRECTORY)/src/gallium/targets/Makefile.dri			\
 	$(DIRECTORY)/src/gallium/targets/Makefile.xorg			\
 	$(DIRECTORY)/src/gallium/targets/SConscript.dri			\
 	$(DIRECTORY)/src/gallium/*/Makefile				\
 	$(DIRECTORY)/src/gallium/*/SConscript				\
 	$(DIRECTORY)/src/gallium/*/*/Makefile				\
@@ -356,6 +358,7 @@ DRI_FILES = \
 	$(DIRECTORY)/src/mesa/drivers/dri/common/xmlpool/*.[ch]		\
 	$(DIRECTORY)/src/mesa/drivers/dri/common/xmlpool/*.po		\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/*.[chS]			\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/*.cpp			\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/*/*.[chS]			\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/Makefile			\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/*/Makefile			\
--- a/SConstruct
+++ b/SConstruct
@@ -208,7 +208,7 @@ Export('env')

 SConscript(
 	'src/SConscript',
 	variant_dir = env['build'],
 	variant_dir = env['build_dir'],
 	duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html
 )

--- a/common.py
+++ b/common.py
@@ -81,8 +81,8 @@ def AddOptions(opts):
 		from SCons.Variables.EnumVariable import EnumVariable as EnumOption
 	except ImportError:
 		from SCons.Options.EnumOption import EnumOption
 	opts.Add(BoolOption('debug', 'debug build', 'yes'))
 	opts.Add(BoolOption('profile', 'profile build', 'no'))
 	opts.Add(EnumOption('build', 'build type', 'debug',
 	                  allowed_values=('debug', 'checked', 'profile', 'release')))
 	opts.Add(BoolOption('quiet', 'quiet command lines', 'yes'))
 	opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
 											 allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
@@ -91,3 +91,5 @@ def AddOptions(opts):
 	opts.Add('toolchain', 'compiler toolchain', 'default')
 	opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
 	opts.Add(BoolOption('dri', 'build DRI drivers', default_dri))
 	opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
 	opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -58,7 +58,7 @@ EGL_DRIVERS_DIRS = glx

 DRIVER_DIRS = dri
 GALLIUM_WINSYS_DIRS = sw sw/xlib drm/vmware drm/intel drm/i965
 GALLIUM_TARGET_DIRS = egl-swrast
 GALLIUM_TARGET_DIRS = 
 GALLIUM_STATE_TRACKERS_DIRS = egl

 DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon \
--- a/configure.ac
+++ b/configure.ac
@@ -33,6 +33,10 @@ AC_CHECK_PROGS([MAKE], [gmake make])
 AC_PATH_PROG([MKDEP], [makedepend])
 AC_PATH_PROG([SED], [sed])

 if test "x$MKDEP" = "x"; then
    AC_MSG_ERROR([makedepend is required to build Mesa])
 fi

 dnl Our fallback install-sh is a symlink to minstall. Use the existing
 dnl configuration in that case.
 AC_PROG_INSTALL
@@ -692,6 +696,11 @@ AC_SUBST([GLESv2_PC_LIB_PRIV])

 AC_SUBST([HAVE_XF86VIDMODE])

 PKG_CHECK_MODULES([LIBDRM_RADEON],
 		  [libdrm_radeon libdrm >= $LIBDRM_RADEON_REQUIRED],
 		  HAVE_LIBDRM_RADEON=yes,
 		  HAVE_LIBDRM_RADEON=no)

 dnl
 dnl More X11 setup
 dnl
@@ -910,12 +919,7 @@ esac

 case $DRI_DIRS in
 *radeon*|*r200*|*r300*|*r600*)
    PKG_CHECK_MODULES([LIBDRM_RADEON],
 		      [libdrm_radeon libdrm >= $LIBDRM_RADEON_REQUIRED],
 		      HAVE_LIBDRM_RADEON=yes,
 		      HAVE_LIBDRM_RADEON=no)

    if test "$HAVE_LIBDRM_RADEON" = yes; then
    if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
 	RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
    fi
@@ -1363,7 +1367,7 @@ fi
 AC_ARG_WITH([egl-platforms],
    [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
        [comma delimited native platforms libEGL supports, e.g.
        "x11,kms" @<:@default=auto@:>@])],
        "x11,drm" @<:@default=auto@:>@])],
    [with_egl_platforms="$withval"],
    [with_egl_platforms=yes])
 AC_ARG_WITH([egl-displays],
@@ -1376,6 +1380,9 @@ case "$with_egl_platforms" in
 yes)
    if test "x$enable_egl" = xyes && test "x$mesa_driver" != xosmesa; then
        EGL_PLATFORMS="x11"
        if test "$mesa_driver" = dri; then
            EGL_PLATFORMS="$EGL_PLATFORMS drm"
        fi
    fi
    ;;
 *)
@@ -1518,18 +1525,28 @@ elif test "x$enable_gallium_i965" = xauto; then
 fi

 dnl
 dnl Gallium Radeon configuration
 dnl Gallium Radeon r300g configuration
 dnl
 AC_ARG_ENABLE([gallium-radeon],
    [AS_HELP_STRING([--enable-gallium-radeon],
        [build gallium radeon @<:@default=disabled@:>@])],
    [enable_gallium_radeon="$enableval"],
    [enable_gallium_radeon=auto])
 if test "x$enable_gallium_radeon" = xauto; then
    if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
 	gallium_check_st "radeon/drm" "dri-r300"
    else
 	AC_MSG_WARN([libdrm_radeon is missing, not building gallium-radeon (r300)])
    fi
 fi
 if test "x$enable_gallium_radeon" = xyes; then
    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
    gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon"
 elif test "x$enable_gallium_radeon" = xauto; then
    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
    if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
 	gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon"
    else
 	AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-radeon (r300)])
    fi
 fi

 dnl
@@ -1541,8 +1558,12 @@ AC_ARG_ENABLE([gallium-r600],
    [enable_gallium_r600="$enableval"],
    [enable_gallium_r600=auto])
 if test "x$enable_gallium_r600" = xyes; then
    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
    gallium_check_st "r600/drm" "dri-r600"
    if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
 	gallium_check_st "r600/drm" "dri-r600"
    else
 	AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-r600])
    fi
 fi

 dnl
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -25,7 +25,7 @@ Non-normalized Integer texture/framebuffer formats    not started
 Packed depth/stencil formats                          DONE
 Per-buffer blend and masks (GL_EXT_draw_buffers2)     DONE
 GL_EXT_texture_compression_rgtc                       not started
 Red and red/green texture formats                     Ian?
 Red and red/green texture formats                     DONE (swrast, i965)
 Transform feedback (GL_EXT_transform_feedback)        ~50% done
   glBindFragDataLocation, glGetFragDataLocation,
   glBindBufferRange, glBindBufferBase commands
@@ -44,7 +44,7 @@ Instanced drawing (GL_ARB_draw_instanced)             ~50% done
 Buffer copying (GL_ARB_copy_buffer)                   DONE
 Primitive restart (GL_NV_primitive_restart)           not started
 16 vertex texture image units                         not started
 Texture buffer objs (GL_ARB_textur_buffer_object)     not started
 Texture buffer objs (GL_ARB_texture_buffer_object)    not started
 Rectangular textures (GL_ARB_texture_rectangle)       DONE
 Uniform buffer objs (GL_ARB_uniform_buffer_object)    not started
 Signed normalized texture formats                     ~50% done
@@ -69,7 +69,7 @@ GL 3.3:

 GLSL 3.30                                             not started
 GL_ARB_blend_func_extended                            not started
 GL_ARB_explicit_attrib_location                       not started
 GL_ARB_explicit_attrib_location                       DONE (swrast, i915, i965)
 GL_ARB_occlusion_query2                               not started
 GL_ARB_sampler_objects                                not started
 GL_ARB_texture_rgb10_a2ui                             not started
--- a/docs/devinfo.html
+++ b/docs/devinfo.html
@@ -145,7 +145,7 @@ Make sure the values in src/mesa/main/version.h are correct.
 </p>

 <p>
 Update the docs/news.html file and docs/download.html files.
 Update docs/news.html.
 </p>

 <p>
@@ -208,10 +208,11 @@ sftp USERNAME,mesa3d@web.sourceforge.net

 <p>
 Make an announcement on the mailing lists:
 <em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>d</em><em>e</em><em>v</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>,
 <em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>u</em><em>s</em><em>e</em><em>r</em><em>s</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>

 <em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>d</em><em>e</em><em>v</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>,
 <em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>u</em><em>s</em><em>e</em><em>r</em><em>s</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>
 and
 <em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>a</em><em>n</em><em>n</em><em>o</em><em>u</em><em>n</em><em>c</em><em>e</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>
 <em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>a</em><em>n</em><em>n</em><em>o</em><em>u</em><em>n</em><em>c</em><em>e</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>
 </p>


--- a/docs/egl.html
+++ b/docs/egl.html
@@ -72,13 +72,13 @@ drivers will be installed to <code>${libdir}/egl</code>.</p>
 <li><code>--with-egl-platforms</code>

 <p>List the platforms (window systems) to support.  Its argument is a comma
 seprated string such as <code>--with-egl-platforms=x11,kms</code>.  It decides
 seprated string such as <code>--with-egl-platforms=x11,drm</code>.  It decides
 the platforms a driver may support.  The first listed platform is also used by
 the main library to decide the native platform: the platform the EGL native
 types such as <code>EGLNativeDisplayType</code> or
 <code>EGLNativeWindowType</code> defined for.</p>

 <p>The available platforms are <code>x11</code>, <code>kms</code>,
 <p>The available platforms are <code>x11</code>, <code>drm</code>,
 <code>fbdev</code>, and <code>gdi</code>.  The <code>gdi</code> platform can
 only be built with SCons.</p>

--- a/docs/fbdev-dri.html
+++ b/docs/fbdev-dri.html
@@ -9,6 +9,9 @@
 <center><h1>Mesa fbdev/DRI Drivers</h1></center>
 <br>

 <h1><center>NOTE: this information is obsolete and will be removed at
 a future date</center></h1>

 <h1>1. Introduction</h1>

 <p>
@@ -22,7 +25,7 @@ Contributors to this project include Jon Smirl, Keith Whitwell and Dave Airlie.

 <p>
 Applications in the fbdev/DRI environment use
 the <a href="http://www.nabble.com/file/p15480666/MiniGXL.html"> MiniGLX</a> interface to choose pixel
 the MiniGLX interface to choose pixel
 formats, create rendering contexts, etc.  It's a subset of the GLX and
 Xlib interfaces allowing some degree of application portability between
 the X and X-less environments.
@@ -315,8 +318,7 @@ It means that the sample_server process is not running.
 <h1>5.0 Programming Information</h1>

 <p>
 OpenGL/Mesa is interfaced to fbdev via the <a href="http://www.nabble.com/file/p15480666/MiniGLX.html">MiniGLX</a>
 interface.
 OpenGL/Mesa is interfaced to fbdev via the MiniGLX interface.
 MiniGLX is a subset of Xlib and GLX API functions which provides just
 enough functionality to setup OpenGL rendering and respond to simple
 input events.
@@ -332,7 +334,7 @@ This allows some degree of flexibility for software development and testing.
 However, the MiniGLX API is not binary-compatible with full Xlib/GLX.
 Some of the structures are different and some macros/functions work
 differently.
 See the <code>GL/miniglx.h</code> header file for details.
 See the GL/miniglx.h header file for details.
 </p>


--- a/docs/news.html
+++ b/docs/news.html
@@ -11,6 +11,22 @@
 <H1>News</H1>


 <h2>October 4, 2010</h2>

 <p>
 <a href="relnotes-7.9.html">Mesa 7.9</a> (final) is released.  This is a new
 development release.
 </p>


 <h2>September 27, 2010</h2>

 <p>
 <a href="relnotes-7.9.html">Mesa 7.9.0-rc1</a> is released.  This is a
 release candidate for the 7.9 development release.
 </p>


 <h2>June 16, 2010</h2>

 <p>
@@ -1277,7 +1293,6 @@ grateful.
 <p>
 </p><h2>March 18, 1999</h2>
 <p>The new webpages are now online. Enjoy, and let me know if you find any errors.
 For an eye-candy free version you can use <a href="http://www.mesa3d.org/txt/">http://www.mesa3d.org/txt/</a>.</p>
 <p>
 </p><h2>February 16, 1999</h2>
 <p><a href="http://www.sgi.com/">SGI</a> releases its <a href="http://www.sgi.com/software/opensource/glx/">GLX
--- a/docs/relnotes-7.10.html
+++ b/docs/relnotes-7.10.html
@@ -0,0 +1,53 @@
 <HTML>

 <TITLE>Mesa Release Notes</TITLE>

 <head><link rel="stylesheet" type="text/css" href="mesa.css"></head>

 <BODY>

 <body bgcolor="#eeeeee">

 <H1>Mesa 7.10 Release Notes / tbd</H1>

 <p>
 Mesa 7.10 is a new development release.
 People who are concerned with stability and reliability should stick
 with a previous release or wait for Mesa 7.10.1.
 </p>
 <p>
 Mesa 7.10 implements the OpenGL 2.1 API, but the version reported by
 glGetString(GL_VERSION) depends on the particular driver being used.
 Some drivers don't support all the features required in OpenGL 2.1.
 </p>
 <p>
 See the <a href="install.html">Compiling/Installing page</a> for prerequisites
 for DRI hardware acceleration.
 </p>


 <h2>MD5 checksums</h2>
 <pre>
 tbd
 </pre>


 <h2>New features</h2>
 <ul>
 <li>GL_ARB_explicit_attrib_location extension (Intel and software drivers).
 </ul>


 <h2>Bug fixes</h2>
 <ul>
 <li>tbd</li>
 </ul>


 <h2>Changes</h2>
 <ul>
 <li>tbd</li>
 </ul>

 </body>
 </html>
--- a/docs/relnotes-7.8.2.html
+++ b/docs/relnotes-7.8.2.html
@@ -26,7 +26,15 @@ for DRI hardware acceleration.

 <h2>MD5 checksums</h2>
 <pre>
 tbd
 c89b63d253605ed40e8ac370d25a833c  MesaLib-7.8.2.tar.gz
 6be2d343a0089bfd395ce02aaf8adb57  MesaLib-7.8.2.tar.bz2
 a04ad3b06ac5ff3969a003fa7bbf7d5b  MesaLib-7.8.2.zip
 7c213f92efeb471f0331670d5079d4c0  MesaDemos-7.8.2.tar.gz
 757d9e2e06f48b1a52848be9b0307ced  MesaDemos-7.8.2.tar.bz2
 8d0e5cfe68b8ebf90265d350ae2c48b1  MesaDemos-7.8.2.zip
 b74482e3f44f35ed395c4aada4fd8240  MesaGLUT-7.8.2.tar.gz
 a471807b65e49c325808ba4551be93ed  MesaGLUT-7.8.2.tar.bz2
 9f190268c42be582ef66e47365ee61e3  MesaGLUT-7.8.2.zip
 </pre>


@@ -44,10 +52,95 @@ tbd
 <ul>
 <li>Fixed Gallium glDrawPixels(GL_DEPTH_COMPONENT).
 <li>Fixed Gallium Cell driver to buildable, runable state
 <li>Fixed bad error checking for glFramebufferRenderbuffer(attachment=GL_DEPTH_STENCIL_ATTACHMENT).
 <li>Fixed incorrect Z coordinate handling in "meta" glDraw/CopyPixels.
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=23670">Bug
    #23670</a>.</li>

 <li>Assorted i965 driver fixes.
    Including but not limited to:
  <ul>
    <li>Fix scissoring when width or height is
    0.  <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27643">Bug
    #27643</a>.
    <li>Fix bit allocation for number of color regions for
    ARB_draw_buffers.</li>
    <li>Set the correct provoking vertex for clipped first-mode
    trifans.  <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24470">Bug
    #24470</a>.</li>
    <li>Use <code>R16G16B16A16_FLOAT</code> for 3-component half-float.</li>
    <li>Fix assertion for surface tile offset usage on Ironlake.</li>
    <li>Fix cube map layouts on Ironlake.</li>
    <li>When an RB gets a new region, clear the old from the state
    cache.  <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24119">Bug
    #24119</a>.</li>
    <li>Reject shaders with uninlined function calls instead of hanging.</li>
  </ul>
 </li>

 <li>Assorted i915 driver fixes.  Including but not limited to:
  <ul>
    <li>Fixed texture LOD clamping in i915 driver.
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24846">Bug
    #24846</a>.</li>
    <li>Fix off-by-one for drawing rectangle.
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27408">Bug
    #27408</a>.</li>
  </ul>
 </li>

 <li>Fixed hangs in etracer on 830 and 845
    chipsets.  <a href="https://bugs.freedesktop.org/show_bug.cgi?id=26557">Bug
    #26557</a>.</li>
 <li>Fixed tiling of small textures on all Intel drivers.</li>
 <li>Fixed crash in Savage driver when using <code>_mesa_CopyTexImage2D</code>.
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27652">Bug
    #27652</a>.</li>

 <li>Assorted GLX fixes.  Including but not limited to:
  <ul>
    <li>Fixed <code>__glXInitializeVisualConfigFromTags</code>'s handling of
        unrecognized fbconfig tags.</li>
    <li>Fixed regression with <code>GLX_USE_GL</code>.
    <li>Fixed config chooser logic for 'mask' matching.</li>
    <li>Report swap events correctly in direct rendered case (DRI2)</li>
    <li>Fixed build with dri2proto which doesn't define
        <code>X_DRI2SwapInterval</code>.</li>
    <li>Get <code>GLX_SCREEN</code> first in <code>__glXQueryContextInfo</code>.
        <a href="https://bugs.freedesktop.org/show_bug.cgi?id=14245">Bug
 	#14245</a>.</li>
  </ul>
 </li>

 <li>Assorted GLSL fixes.  Including but not limited to:
  <ul>
    <li>Change variable declared assertion into conditional in GLSL
        compiler.  <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27921">Bug
        #27921</a>.</li>
    <li>Fix instruction indexing
        bugs.  <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27566">Bug
        #27566</a>.</li>
    <li>Updated uniform location / offset encoding to be more like
        other implementations.</li>
    <li>Don't overwrite a driver's shader infolog with generic failure
        message.</li>
  </ul>
 </li>

 <li>Fixed OSMesa build for 16 and 32-bit color channel depth.
 <li>Fixed OSMesa build with hidden symbol visibility. libOSMesa no longer links to libGL.
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=28305">Bug
    #28305</a>.
 <li>Fixed handling of multiple render targets in fixed-function
    texture envrionmnent programs.</li>
 <li>Fixed conversion errors in <code>signed_rgba8888[rev]</code> texel
    fetch.</li>
 <li>Don't set srcLevel on <code>GL_TEXTURE_RECTANGLE_ARB</code> targets.</li>
 <li>Various build fixes for OpenBSD.</li>
 <li>Various build fixes for OS X.</li>
 <li>Various build fixes for GCC 3.3.</li>
 </ul>


 <h2>Changes</h2>
 <p>None.</p>
 </body>
 </html>
--- a/docs/relnotes-7.8.3.html
+++ b/docs/relnotes-7.8.3.html
@@ -0,0 +1,89 @@
 <HTML>

 <TITLE>Mesa Release Notes</TITLE>

 <head><link rel="stylesheet" type="text/css" href="mesa.css"></head>

 <BODY>

 <body bgcolor="#eeeeee">

 <H1>Mesa 7.8.3 Release Notes / (date tbd)</H1>

 <p>
 Mesa 7.8.3 is a bug fix release which fixes bugs found since the 7.8.2 release.
 </p>
 <p>
 Mesa 7.8.3 implements the OpenGL 2.1 API, but the version reported by
 glGetString(GL_VERSION) depends on the particular driver being used.
 Some drivers don't support all the features required in OpenGL 2.1.
 </p>
 <p>
 See the <a href="install.html">Compiling/Installing page</a> for prerequisites
 for DRI hardware acceleration.
 </p>


 <h2>MD5 checksums</h2>
 <pre>
 x  MesaLib-7.8.3.tar.gz
 x  MesaLib-7.8.3.tar.bz2
 x  MesaLib-7.8.3.zip
 x  MesaDemos-7.8.3.tar.gz
 x  MesaDemos-7.8.3.tar.bz2
 x  MesaDemos-7.8.3.zip
 x  MesaGLUT-7.8.3.tar.gz
 x  MesaGLUT-7.8.3.tar.bz2
 x  MesaGLUT-7.8.3.zip
 </pre>


 <h2>New features</h2>
 <p>None.</p>


 <h2>Changes</h2>
 <ul>
 <li>The radeon driver should use less memory when searching for a valid mip
 image.</li>
 </ul>


 <h2>Bug fixes</h2>
 <ul>
 <li>Fix unsupported FB with D24S8 (bug 
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=23670">29116</a>)</li>
 <li>Fix ReadPixels crash when reading depth/stencil from an FBO</li>
 <li>Fixed a bug rendering to 16-bit buffers using swrast.</li>
 <li>Fixed a state tracker/TGSI bug that caused crashes when using Windows'
    memory debugging features.</li>
 <li>Fixed an issue rendering to 32-bit channels with swrast (bug
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=29487">29487</a>)</li>
 <li>GLSL: fix indirect <TT>gl_TextureMatrix</TT> addressing (bug
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=28967">28967</a>)</li>
 <li>GLSL: fix for bug
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27216">27216</a></li>
 <li>GLSL: fix zw fragcoord entries in some cases (bug
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=29183">29183</a>)</li>
 <li>Fix texture env generation in some cases (bug
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=28169">28169</a>)</li>
 <li>osmesa: a fix for calling <TT>OSMesaMakeCurrent</TT> twice was applied (bug 
    <a href="https://bugs.freedesktop.org/show_bug.cgi?id=10966">10966</a></li>
 <li>A bug was fixed which could cause Mesa to ignore the
    <TT>MESA_EXTENSION_OVERRIDE</TT> environment variable.</li>
 <li>A bug related to specular highlights on backfaces was fixed.</li>
 <li>A radeon-specific issue with <TT>glCopyTex(Sub)Image</TT> was
    corrected.</li>
 <li>radeon/wine: flush command stream in more cases, fixing wine d3d9
    tests.</li>
 <li>r600: fix sin+cos normalization.</li>
 <li>r600: (properly) ignore <TT>GL_COORD_REPLACE</TT> when point sprites are
    disabled.</li>
 <li>radeon: avoid flushing when the context is not current.</li>
 <li>r300c: a bug affecting unaligned BOs was fixed.</li>
 <li>r300c: a hardlock caused by ARB_half_float_vertex incorrectly advertised on some chipsets.</li>
 </ul>


 </body>
 </html>
--- a/docs/relnotes-7.9.html
+++ b/docs/relnotes-7.9.html
@@ -8,7 +8,7 @@

 <body bgcolor="#eeeeee">

 <H1>Mesa 7.9 Release Notes / date TBD</H1>
 <H1>Mesa 7.9 Release Notes / October 4, 2010</H1>

 <p>
 Mesa 7.9 is a new development release.
@@ -28,7 +28,12 @@ for DRI hardware acceleration.

 <h2>MD5 checksums</h2>
 <pre>
 tbd
 ed65ab425b25895c7f473d0a5e6e64f8  MesaLib-7.9.tar.gz
 82c740c49d572baa6da2b1a1eee90bca  MesaLib-7.9.tar.bz2
 cd2b6ecec759b0457475e94bbb38fedb  MesaLib-7.9.zip
 7b54af9fb9b1f6a1a65db2520f50848f  MesaGLUT-7.9.tar.gz
 20d07419d1929f833fdb36bced290ad5  MesaGLUT-7.9.tar.bz2
 62a7edecd7c92675cd6029b05217eb0a  MesaGLUT-7.9.zip
 </pre>


@@ -37,16 +42,85 @@ tbd
 <li>New, improved GLSL compiler written by Intel.
    See the <a href="shading.html"> Shading Language</a> page for
    more information.
 <li>GL_EXT_timer_query extension (i965 driver only)
 <li>New, very experimental Gallium driver for R600-R700 Radeons.
 <li>Support for AMD Evergreen-based Radeons (HD 5xxx)
 <li>GL_EXT_timer_query extension (i965 driver and softpipe only)
 <li>GL_EXT_framebuffer_multisample extension (intel drivers, MAX_SAMPLES = 1)
 <li>GL_ARB_texture_swizzle extension (alias of GL_EXT_texture_swizzle)
 <li>GL_ARB_draw_elements_base_vertex, GL_ARB_fragment_program_shadow
    and GL_EXT_draw_buffers2 in Gallium drivers
 <li>GL_ARB_draw_elements_base_vertex, GL_ARB_fragment_program_shadow,
    GL_ARB_window_pos, GL_EXT_gpu_program_parameters,
    GL_ATI_texture_env_combine3, GL_MESA_pack_invert, and GL_OES_EGL_image
    extensions in Gallium drivers
 <li>GL_ARB_depth_clamp and GL_NV_depth_clamp extensions (in nv50 and r600
    Gallium drivers)
 <li>GL_ARB_half_float_vertex extension (in nvfx, r300, r600, softpipe,
    and llvmpipe Gallium drivers)
 <li>GL_EXT_draw_buffers2 (in nv50, r600, softpipe, and llvmpipe Gallium
    drivers)
 <li>GL_EXT_texture_swizzle (in nvfx, r300, r600, softpipe, and llvmpipe
    Gallium drivers)
 <li>GL_ATI_texture_mirror_once (in nvfx, nv50, r300, r600, softpipe, and
    llvmpipe Gallium drivers)
 <li>GL_NV_conditional_render (in r300 Gallium driver)
 <li>Initial "signs of life" support for Sandybridge hardware in i965 DRI
    driver.
 </ul>


 <h2>Bug fixes</h2>
 <p>This list is likely incomplete.</p>
 <ul>
 <li>Massive improvements to the Gallium driver for R300-R500 Radeons; this
    driver is now considered stable for use as a DRI (OpenGL) driver.
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=10908">Bug 10908</a> - GLSL: gl_FogParamaters gl_Fog built-in uniform not functioning</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=13753">Bug 13753</a> - Numerous bugs in GLSL uniform handling</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=16854">Bug 16854</a> - GLSL function call at global scope causes SEGV</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=16856">Bug 16856</a> - GLSL indexing of unsized array results in assertion failure</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=18659">Bug 18659</a> - Crash in shader/slang/slang_codegen.c _slang_gen_function_call_name()</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=19089">Bug 19089</a> - [GLSL] glsl1/shadow2D() cases fail</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=22622">Bug 22622</a> - [GM965 GLSL] noise*() cause GPU lockup</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=23743">Bug 23743</a> - For loop from 0 to 0 not optimized out</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=24553">Bug 24553</a> - shader compilation times explode when using more () pairs</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25664">Bug 25664</a> - [GLSL] re-declaring an empty array fails to compile</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25769">Bug 25769</a> - [GLSL] "float" can be implicitly converted to "int"</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25808">Bug 25808</a> - [GLSL] const variable is modified successfully</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25826">Bug 25826</a> - [GLSL] declaring an unsized array then re-declaring with a size fails</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25827">Bug 25827</a> - [GLSL] vector constructor accepts too many arguments successfully</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25829">Bug 25829</a> - [GLSL] allowing non-void function without returning value</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25830">Bug 25830</a> - [GLSL] allowing non-constant-expression as const declaration initializer</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25877">Bug 25877</a> - [GLSL 1.10] implicit conversion from "int" to "float" should not be allowed</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25878">Bug 25878</a> - [GLSL] sampler is converted to int successfully</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25994">Bug 25994</a> - [GM45][GLSL] 'return' statement in vertex shader unsupported</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25999">Bug 25999</a> - [GLSL] embedded structure constructor fails to compile</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26000">Bug 26000</a> - [GLSL] allowing different parameter qualifier between the function definition and declaration</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26001">Bug 26001</a> - [GLSL 1.10] constructing matrix from matrix succeeds</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26224">Bug 26224</a> - [GLSL] Cannot get location of a uniform struct member</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26990">Bug 26990</a> - [GLSL] variable declaration in "while" fails to compile</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27004">Bug 27004</a> - [GLSL] allowing macro redefinition</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27060">Bug 27060</a> - [965] piglit glsl-fs-raytrace failure due to lack of function calls.</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27216">Bug 27216</a> - Assignment with a function call in an if statement causes an assertion failure</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27261">Bug 27261</a> - GLSL Compiler fails on the following vertex shader</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27265">Bug 27265</a> - GLSL Compiler doesnt link the attached vertex shader</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27388">Bug 27388</a> - [i965] piglit glsl-vs-arrays failure</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27403">Bug 27403</a> - GLSL struct causing "Invalid src register file ..." error</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27914">Bug 27914</a> - GLSL compiler uses MUL+ADD where it could use MAD</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28055">Bug 28055</a> - glsl-texcoord-array fails GLSL compilation</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28374">Bug 28374</a> - SIGSEGV shader/slang/slang_typeinfo.c:534</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28748">Bug 28748</a> - [i965] uninlined function calls support</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28833">Bug 28833</a> - piglit/shaders/glsl-texcoord-array fail</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28834">Bug 28834</a> - Add support for system fpclassify to GL_OES_query_matrix function for OpenBSD / NetBSD</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28837">Bug 28837</a> - varying vec4 index support</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28845">Bug 28845</a> - The GLU tesselator code has some warnings</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28889">Bug 28889</a> - [regression] wine game crash</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28894">Bug 28894</a> - slang build fails if absolute path contains spaces</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28913">Bug 28913</a> - [GLSL] allowing two version statements</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28931">Bug 28931</a> - Floating Point Exception in Warzone2100 Trunk version</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28966">Bug 28966</a> - [r300g] Dynamic branching 3 demo does not run</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28967">Bug 28967</a> - slang/slang_emit.c:350: storage_to_src_reg: Assertion `index &gt;= 0' failed.</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29013">Bug 29013</a> - [r300g] translate_rgb_op: unknown opcode ILLEGAL OPCODE</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29020">Bug 29020</a> - [r300g] Wine d3d9 tests hardlock</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29910">Bug 29910</a> - Mesa advertises bogus GL_ARB_shading_language_120</li>
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=30196">Bug 30196</a> - [GLSL] gl_TextureMatrix{Inverse,Transpose,InverseTranspose} unsupported</li>
 </ul>


--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -14,6 +14,7 @@ The release notes summarize what's new or changed in each Mesa release.

 <UL>
 <LI><A HREF="relnotes-7.9.html">7.9 release notes</A>
 <LI><A HREF="relnotes-7.8.3.html">7.8.3 release notes</A>
 <LI><A HREF="relnotes-7.8.2.html">7.8.2 release notes</A>
 <LI><A HREF="relnotes-7.8.1.html">7.8.1 release notes</A>
 <LI><A HREF="relnotes-7.8.html">7.8 release notes</A>
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -805,7 +805,7 @@ typedef struct __DRIimageExtensionRec __DRIimageExtension;
 struct __DRIimageExtensionRec {
    __DRIextension base;

    __DRIimage *(*createImageFromName)(__DRIcontext *context,
    __DRIimage *(*createImageFromName)(__DRIscreen *screen,
 				       int width, int height, int format,
 				       int name, int pitch,
 				       void *loaderPrivate);
@@ -841,7 +841,7 @@ typedef struct __DRIimageLookupExtensionRec __DRIimageLookupExtension;
 struct __DRIimageLookupExtensionRec {
    __DRIextension base;

    __DRIimage *(*lookupEGLImage)(__DRIcontext *context, void *image,
    __DRIimage *(*lookupEGLImage)(__DRIscreen *screen, void *image,
 				  void *loaderPrivate);
 };

--- a/scons/crossmingw.py
+++ b/scons/crossmingw.py
@@ -54,11 +54,13 @@ prefixes32 = SCons.Util.Split("""
    i586-mingw32msvc-
    i686-mingw32msvc-
    i686-pc-mingw32-
    i686-w64-mingw32-
 """)
 prefixes64 = SCons.Util.Split("""
    amd64-mingw32-
    amd64-mingw32msvc-
    amd64-pc-mingw32-
    x86_64-w64-mingw32-
 """)

 def find(env):
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -49,14 +49,14 @@ def symlink(target, source, env):
    os.symlink(os.path.basename(source), target)

 def install(env, source, subdir):
    target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build'], subdir)
    target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'], subdir)
    env.Install(target_dir, source)

 def install_program(env, source):
    install(env, source, 'bin')

 def install_shared_library(env, sources, version = ()):
    install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build'])
    install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'])
    version = tuple(map(str, version))
    if env['SHLIBSUFFIX'] == '.dll':
        dlls = env.FindIxes(sources, 'SHLIBPREFIX', 'SHLIBSUFFIX')
@@ -130,7 +130,6 @@ def generate(env):
    env['msvc'] = env['CC'] == 'cl'

    # shortcuts
    debug = env['debug']
    machine = env['machine']
    platform = env['platform']
    x86 = env['machine'] == 'x86'
@@ -138,20 +137,42 @@ def generate(env):
    gcc = env['gcc']
    msvc = env['msvc']

    # Backwards compatability with the debug= profile= options
    if env['build'] == 'debug':
        if not env['debug']:
            print 'scons: debug option is deprecated: use instead build=release'
            env['build'] = 'release'
        if env['profile']:
            print 'scons: profile option is deprecated: use instead build=profile'
            env['build'] = 'profile'
    if False:
        # Enforce SConscripts to use the new build variable
        env.popitem('debug')
        env.popitem('profile')
    else:
        # Backwards portability with older sconscripts
        if env['build'] in ('debug', 'checked'):
            env['debug'] = True
            env['profile'] = False
        if env['build'] == 'profile':
            env['debug'] = False
            env['profile'] = True
        if env['build'] == 'release':
            env['debug'] = False
            env['profile'] = False

    # Put build output in a separate dir, which depends on the current
    # configuration. See also http://www.scons.org/wiki/AdvancedBuildExample
    build_topdir = 'build'
    build_subdir = env['platform']
    if env['machine'] != 'generic':
        build_subdir += '-' + env['machine']
    if env['debug']:
        build_subdir += "-debug"
    if env['profile']:
        build_subdir += "-profile"
    if env['build'] != 'release':
        build_subdir += '-' +  env['build']
    build_dir = os.path.join(build_topdir, build_subdir)
    # Place the .sconsign file in the build dir too, to avoid issues with
    # different scons versions building the same source file
    env['build'] = build_dir
    env['build_dir'] = build_dir
    env.SConsignFile(os.path.join(build_dir, '.sconsign'))
    if 'SCONS_CACHE_DIR' in os.environ:
        print 'scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],)
@@ -165,11 +186,11 @@ def generate(env):

    # C preprocessor options
    cppdefines = []
    if debug:
    if env['build'] in ('debug', 'checked'):
        cppdefines += ['DEBUG']
    else:
        cppdefines += ['NDEBUG']
    if env['profile']:
    if env['build'] == 'profile':
        cppdefines += ['PROFILE']
    if platform == 'windows':
        cppdefines += [
@@ -190,7 +211,7 @@ def generate(env):
                '_SCL_SECURE_NO_WARNINGS',
                '_SCL_SECURE_NO_DEPRECATE',
            ]
        if debug:
        if env['build'] in ('debug', 'checked'):
            cppdefines += ['_DEBUG']
    if env['toolchain'] == 'winddk':
        # Mimic WINDDK's builtin flags. See also:
@@ -217,7 +238,7 @@ def generate(env):
            ('__BUILDMACHINE__', 'WinDDK'),
            ('FPO', '0'),
        ]
        if debug:
        if env['build'] in ('debug', 'checked'):
            cppdefines += [('DBG', 1)]
    if platform == 'wince':
        cppdefines += [
@@ -253,15 +274,16 @@ def generate(env):
    ccflags = [] # C & C++
    if gcc:
        ccversion = env['CCVERSION']
        if debug:
            ccflags += ['-O0', '-g3']
        if env['build'] == 'debug':
            ccflags += ['-O0']
        elif ccversion.startswith('4.2.'):
            # gcc 4.2.x optimizer is broken
            print "warning: gcc 4.2.x optimizer is broken -- disabling optimizations"
            ccflags += ['-O0', '-g3']
            ccflags += ['-O0']
        else:
            ccflags += ['-O3', '-g3']
        if env['profile']:
            ccflags += ['-O3']
        ccflags += ['-g3']
        if env['build'] in ('checked', 'profile'):
            # See http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#Which_options_should_I_pass_to_gcc_when_compiling_for_profiling?
            ccflags += [
                '-fno-omit-frame-pointer',
@@ -320,7 +342,7 @@ def generate(env):
        # See also:
        # - http://msdn.microsoft.com/en-us/library/19z1t1wy.aspx
        # - cl /?
        if debug:
        if env['build'] == 'debug':
            ccflags += [
              '/Od', # disable optimizations
              '/Oi', # enable intrinsic functions
@@ -389,7 +411,7 @@ def generate(env):
    if env['platform'] == 'windows' and msvc:
        # Choose the appropriate MSVC CRT
        # http://msdn.microsoft.com/en-us/library/2kzt1wy3.aspx
        if env['debug']:
        if env['build'] in ('debug', 'checked'):
            env.Append(CCFLAGS = ['/MTd'])
            env.Append(SHCCFLAGS = ['/LDd'])
        else:
@@ -421,7 +443,7 @@ def generate(env):
        else:
            env['_LIBFLAGS'] = '-Wl,--start-group ' + env['_LIBFLAGS'] + ' -Wl,--end-group'
    if msvc:
        if not env['debug']:
        if env['build'] != 'debug':
            # enable Link-time Code Generation
            linkflags += ['/LTCG']
            env.Append(ARFLAGS = ['/LTCG'])
@@ -460,7 +482,7 @@ def generate(env):

            '/entry:DrvEnableDriver',
        ]
        if env['debug'] or env['profile']:
        if env['build'] != 'release':
            linkflags += [
                '/MAP', # http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx
            ]
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -124,7 +124,7 @@ def generate(env):
            # Some of the LLVM C headers use the inline keyword without
            # defining it.
            env.Append(CPPDEFINES = [('inline', '__inline')])
            if env['debug']:
            if env['build'] in ('debug', 'checked'):
                # LLVM libraries are static, build with /MT, and they
                # automatically link agains LIBCMT. When we're doing a
                # debug build we'll be linking against LIBCMTD, so disable
--- a/scons/wcesdk.py
+++ b/scons/wcesdk.py
@@ -122,7 +122,7 @@ def get_wce600_paths(env):
    host_cpu = os.environ.get('_HOSTCPUTYPE', 'i386')
    target_cpu = os.environ.get('_TGTCPU', 'x86')

    if env['debug']:
    if env['build'] == 'debug':
        build = 'debug'
    else:
        build = 'retail'
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -292,7 +292,7 @@ dri2_process_buffers(struct dri2_egl_surface *dri2_surf,
   struct dri2_egl_display *dri2_dpy =
      dri2_egl_display(dri2_surf->base.Resource.Display);
   xcb_rectangle_t rectangle;
   int i;
   unsigned i;

   dri2_surf->buffer_count = count;
   dri2_surf->have_fake_front = 0;
@@ -339,6 +339,8 @@ dri2_get_buffers(__DRIdrawable * driDrawable,
   xcb_dri2_get_buffers_reply_t *reply;
   xcb_dri2_get_buffers_cookie_t cookie;

   (void) driDrawable;

   cookie = xcb_dri2_get_buffers_unchecked (dri2_dpy->conn,
 					    dri2_surf->drawable,
 					    count, count, attachments);
@@ -360,23 +362,28 @@ dri2_get_buffers(__DRIdrawable * driDrawable,
 static void
 dri2_flush_front_buffer(__DRIdrawable * driDrawable, void *loaderPrivate)
 {
   (void) driDrawable;

   /* FIXME: Does EGL support front buffer rendering at all? */

 #if 0
   struct dri2_egl_surface *dri2_surf = loaderPrivate;

   dri2WaitGL(dri2_surf);
 #else
   (void) loaderPrivate;
 #endif
 }

 static __DRIimage *
 dri2_lookup_egl_image(__DRIcontext *context, void *image, void *data)
 dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data)
 {
   struct dri2_egl_context *dri2_ctx = data;
   _EGLDisplay *disp = dri2_ctx->base.Resource.Display;
   _EGLDisplay *disp = data;
   struct dri2_egl_image *dri2_img;
   _EGLImage *img;

   (void) screen;

   img = _eglLookupImage(image, disp);
   if (img == NULL) {
      _eglError(EGL_BAD_PARAMETER, "dri2_lookup_egl_image");
@@ -407,6 +414,8 @@ dri2_get_buffers_with_format(__DRIdrawable * driDrawable,
   xcb_dri2_get_buffers_with_format_cookie_t cookie;
   xcb_dri2_attach_format_t *format_attachments;

   (void) driDrawable;

   format_attachments = (xcb_dri2_attach_format_t *) attachments;
   cookie = xcb_dri2_get_buffers_with_format_unchecked (dri2_dpy->conn,
 							dri2_surf->drawable,
@@ -440,14 +449,14 @@ struct dri2_extension_match {
 static struct dri2_extension_match dri2_driver_extensions[] = {
   { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
   { __DRI_DRI2, 1, offsetof(struct dri2_egl_display, dri2) },
   { NULL }
   { NULL, 0, 0 }
 };

 static struct dri2_extension_match dri2_core_extensions[] = {
   { __DRI2_FLUSH, 1, offsetof(struct dri2_egl_display, flush) },
   { __DRI_TEX_BUFFER, 2, offsetof(struct dri2_egl_display, tex_buffer) },
   { __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
   { NULL }
   { NULL, 0, 0 }
 };

 static EGLBoolean
@@ -728,7 +737,7 @@ dri2_create_screen(_EGLDisplay *disp)
   dri2_dpy = disp->DriverData;
   dri2_dpy->dri_screen =
      dri2_dpy->dri2->createNewScreen(0, dri2_dpy->fd, dri2_dpy->extensions,
 				      &dri2_dpy->driver_configs, dri2_dpy);
 				      &dri2_dpy->driver_configs, disp);

   if (dri2_dpy->dri_screen == NULL) {
      _eglLog(_EGL_WARNING, "DRI2: failed to create dri screen");
@@ -772,6 +781,8 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp,
 {
   struct dri2_egl_display *dri2_dpy;

   (void) drv;

   dri2_dpy = malloc(sizeof *dri2_dpy);
   if (!dri2_dpy)
      return _eglError(EGL_BAD_ALLOC, "eglInitialize");
@@ -1075,6 +1086,8 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
   const __DRIconfig *dri_config;
   int api;

   (void) drv;

   dri2_ctx = malloc(sizeof *dri2_ctx);
   if (!dri2_ctx) {
      _eglError(EGL_BAD_ALLOC, "eglCreateContext");
@@ -1146,6 +1159,8 @@ dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);

   (void) drv;

   if (_eglIsSurfaceBound(surf))
      return EGL_TRUE;

@@ -1221,6 +1236,8 @@ dri2_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
   xcb_screen_iterator_t s;
   xcb_generic_error_t *error;

   (void) drv;

   dri2_surf = malloc(sizeof *dri2_surf);
   if (!dri2_surf) {
      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
@@ -1369,7 +1386,7 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw,
   xcb_rectangle_t rectangles[16];
   int i;

   if (numRects > ARRAY_SIZE(rectangles))
   if (numRects > (int)ARRAY_SIZE(rectangles))
      return dri2_copy_region(drv, disp, draw, dri2_surf->region);

   /* FIXME: Invert y here? */
@@ -1394,6 +1411,8 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw,
 static _EGLProc
 dri2_get_proc_address(_EGLDriver *drv, const char *procname)
 {
   (void) drv;

   /* FIXME: Do we need to support lookup of EGL symbols too? */

   return (_EGLProc) _glapi_get_proc_address(procname);
@@ -1405,6 +1424,8 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface);

   (void) drv;

   /* FIXME: If EGL allows frontbuffer rendering for window surfaces,
    * we need to copy fake to real here.*/

@@ -1416,6 +1437,9 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
 static EGLBoolean
 dri2_wait_native(_EGLDriver *drv, _EGLDisplay *disp, EGLint engine)
 {
   (void) drv;
   (void) disp;

   if (engine != EGL_CORE_NATIVE_ENGINE)
      return _eglError(EGL_BAD_PARAMETER, "eglWaitNative");
   /* glXWaitX(); */
@@ -1438,6 +1462,8 @@ dri2_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
   xcb_gcontext_t gc;

   (void) drv;

   (*dri2_dpy->flush->flush)(dri2_surf->dri_drawable);

   gc = xcb_generate_id(dri2_dpy->conn);
@@ -1501,6 +1527,11 @@ static EGLBoolean
 dri2_release_tex_image(_EGLDriver *drv,
 		       _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
 {
   (void) drv;
   (void) disp;
   (void) surf;
   (void) buffer;

   return EGL_TRUE;
 }

@@ -1509,7 +1540,6 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
 			     EGLClientBuffer buffer, const EGLint *attr_list)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
   struct dri2_egl_image *dri2_img;
   unsigned int attachments[1];
   xcb_drawable_t drawable;
@@ -1521,6 +1551,8 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
   xcb_generic_error_t *error;
   int stride, format;

   (void) ctx;

   drawable = (xcb_drawable_t) buffer;
   xcb_dri2_create_drawable (dri2_dpy->conn, drawable);
   attachments[0] = XCB_DRI2_ATTACHMENT_BUFFER_FRONT_LEFT;
@@ -1577,7 +1609,7 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,

   stride = buffers[0].pitch / buffers[0].cpp;
   dri2_img->dri_image =
      dri2_dpy->image->createImageFromName(dri2_ctx->dri_context,
      dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
 					   buffers_reply->width,
 					   buffers_reply->height,
 					   format,
@@ -1628,10 +1660,11 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
 				  EGLClientBuffer buffer, const EGLint *attr_list)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
   struct dri2_egl_image *dri2_img;
   EGLint width, height, format, name, stride, pitch, i, err;

   (void) ctx;

   name = (EGLint) buffer;

   err = EGL_SUCCESS;
@@ -1697,7 +1730,7 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
   }

   dri2_img->dri_image =
      dri2_dpy->image->createImageFromName(dri2_ctx->dri_context,
      dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
 					   width,
 					   height,
 					   format,
@@ -1718,6 +1751,8 @@ dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
 		      _EGLContext *ctx, EGLenum target,
 		      EGLClientBuffer buffer, const EGLint *attr_list)
 {
   (void) drv;

   switch (target) {
   case EGL_NATIVE_PIXMAP_KHR:
      return dri2_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
@@ -1737,6 +1772,8 @@ dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_image *dri2_img = dri2_egl_image(image);

   (void) drv;

   dri2_dpy->image->destroyImage(dri2_img->dri_image);
   free(dri2_img);

@@ -1753,6 +1790,8 @@ dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
   unsigned int use, dri_use, valid_mask;
   EGLint err = EGL_SUCCESS;

   (void) drv;

   dri2_img = malloc(sizeof *dri2_img);
   if (!dri2_img) {
      _eglError(EGL_BAD_ALLOC, "dri2_create_image_khr");
@@ -1853,6 +1892,8 @@ dri2_export_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img,
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_image *dri2_img = dri2_egl_image(img);

   (void) drv;

   if (name && !dri2_dpy->image->queryImage(dri2_img->dri_image,
 					    __DRI_IMAGE_ATTRIB_NAME, name)) {
      _eglError(EGL_BAD_ALLOC, "dri2_export_drm_image_mesa");
@@ -1879,6 +1920,8 @@ _eglMain(const char *args)
 {
   struct dri2_egl_driver *dri2_drv;

   (void) args;

   dri2_drv = malloc(sizeof *dri2_drv);
   if (!dri2_drv)
      return NULL;
--- a/src/egl/drivers/glx/egl_glx.c
+++ b/src/egl/drivers/glx/egl_glx.c
@@ -162,7 +162,8 @@ static EGLBoolean
 convert_fbconfig(Display *dpy, GLXFBConfig fbconfig,
                 struct GLX_egl_config *GLX_conf)
 {
   int err = 0, attr, egl_attr, val, i;
   int err = 0, attr, egl_attr, val;
   unsigned i;
   EGLint conformant, config_caveat, surface_type;

   for (i = 0; i < ARRAY_SIZE(fbconfig_attributes); i++) {
@@ -243,7 +244,8 @@ static EGLBoolean
 convert_visual(Display *dpy, XVisualInfo *vinfo,
               struct GLX_egl_config *GLX_conf)
 {
   int err, attr, egl_attr, val, i;
   int err, attr, egl_attr, val;
   unsigned i;
   EGLint conformant, config_caveat, surface_type;

   /* the visual must support OpenGL */
@@ -457,6 +459,8 @@ GLX_eglInitialize(_EGLDriver *drv, _EGLDisplay *disp,
 {
   struct GLX_egl_display *GLX_dpy;

   (void) drv;

   if (disp->Platform != _EGL_PLATFORM_X11)
      return EGL_FALSE;

@@ -541,6 +545,8 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
   struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp);
   struct GLX_egl_context *GLX_ctx_shared = GLX_egl_context(share_list);

   (void) drv;

   if (!GLX_ctx) {
      _eglError(EGL_BAD_ALLOC, "eglCreateContext");
      return NULL;
@@ -604,6 +610,8 @@ GLX_eglMakeCurrent(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
   GLXContext cctx;
   EGLBoolean ret = EGL_FALSE;

   (void) drv;

   /* bind the new context and return the "orphaned" one */
   if (!_eglBindContext(&ctx, &dsurf, &rsurf))
      return EGL_FALSE;
@@ -656,6 +664,8 @@ GLX_eglCreateWindowSurface(_EGLDriver *drv, _EGLDisplay *disp,
   struct GLX_egl_surface *GLX_surf;
   uint width, height;

   (void) drv;

   GLX_surf = CALLOC_STRUCT(GLX_egl_surface);
   if (!GLX_surf) {
      _eglError(EGL_BAD_ALLOC, "eglCreateWindowSurface");
@@ -702,6 +712,8 @@ GLX_eglCreatePixmapSurface(_EGLDriver *drv, _EGLDisplay *disp,
   struct GLX_egl_surface *GLX_surf;
   uint width, height;

   (void) drv;

   GLX_surf = CALLOC_STRUCT(GLX_egl_surface);
   if (!GLX_surf) {
      _eglError(EGL_BAD_ALLOC, "eglCreatePixmapSurface");
@@ -762,6 +774,8 @@ GLX_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp,
   int attribs[5];
   int i;

   (void) drv;

   GLX_surf = CALLOC_STRUCT(GLX_egl_surface);
   if (!GLX_surf) {
      _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface");
@@ -820,6 +834,8 @@ GLX_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp,
 static EGLBoolean
 GLX_eglDestroySurface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
 {
   (void) drv;

   if (!_eglIsSurfaceBound(surf))
      destroy_surface(disp, surf);

@@ -833,6 +849,8 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
   struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp);
   struct GLX_egl_surface *GLX_surf = GLX_egl_surface(draw);

   (void) drv;

   glXSwapBuffers(GLX_dpy->dpy, GLX_surf->glx_drawable);

   return EGL_TRUE;
@@ -844,12 +862,18 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
 static _EGLProc
 GLX_eglGetProcAddress(_EGLDriver *drv, const char *procname)
 {
   (void) drv;

   return (_EGLProc) glXGetProcAddress((const GLubyte *) procname);
 }

 static EGLBoolean
 GLX_eglWaitClient(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
 {
   (void) drv;
   (void) dpy;
   (void) ctx;

   glXWaitGL();
   return EGL_TRUE;
 }
@@ -857,6 +881,9 @@ GLX_eglWaitClient(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
 static EGLBoolean
 GLX_eglWaitNative(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine)
 {
   (void) drv;
   (void) dpy;

   if (engine != EGL_CORE_NATIVE_ENGINE)
      return _eglError(EGL_BAD_PARAMETER, "eglWaitNative");
   glXWaitX();
@@ -880,6 +907,8 @@ _eglMain(const char *args)
 {
   struct GLX_egl_driver *GLX_drv = CALLOC_STRUCT(GLX_egl_driver);

   (void) args;

   if (!GLX_drv)
      return NULL;

--- a/src/egl/main/Makefile
+++ b/src/egl/main/Makefile
@@ -57,7 +57,7 @@ EGL_NATIVE_PLATFORM=_EGL_INVALID_PLATFORM
 ifeq ($(firstword $(EGL_PLATFORMS)),x11)
 EGL_NATIVE_PLATFORM=_EGL_PLATFORM_X11
 endif
 ifeq ($(firstword $(EGL_PLATFORMS)),kms)
 ifeq ($(firstword $(EGL_PLATFORMS)),drm)
 EGL_NATIVE_PLATFORM=_EGL_PLATFORM_DRM
 endif
 ifeq ($(firstword $(EGL_PLATFORMS)),fbdev)
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -402,10 +402,15 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list,
   _EGLContext *context;
   EGLContext ret;

   if (config)
      _EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv);
   else
      _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);
   _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);

   if (!config) {
      /* config may be NULL if surfaceless */
      if (!disp->Extensions.KHR_surfaceless_gles1 &&
          !disp->Extensions.KHR_surfaceless_gles2 &&
          !disp->Extensions.KHR_surfaceless_opengl)
         RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT);
   }

   if (!share && share_list != EGL_NO_CONTEXT)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_CONTEXT);
@@ -459,9 +464,19 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,

   if (!context && ctx != EGL_NO_CONTEXT)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_FALSE);
   if ((!draw_surf && draw != EGL_NO_SURFACE) ||
       (!read_surf && read != EGL_NO_SURFACE))
      RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
   if (!draw_surf || !read_surf) {
      /* surfaces may be NULL if surfaceless */
      if (!disp->Extensions.KHR_surfaceless_gles1 &&
          !disp->Extensions.KHR_surfaceless_gles2 &&
          !disp->Extensions.KHR_surfaceless_opengl)
         RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);

      if ((!draw_surf && draw != EGL_NO_SURFACE) ||
          (!read_surf && read != EGL_NO_SURFACE))
         RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
      if (draw_surf || read_surf)
         RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
   }

   ret = drv->API.MakeCurrent(drv, disp, draw_surf, read_surf, context);

@@ -1276,6 +1291,8 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target,
   EGLImageKHR ret;

   _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
   if (!disp->Extensions.KHR_image_base)
      RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR);
   if (!context && ctx != EGL_NO_CONTEXT)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_IMAGE_KHR);

@@ -1296,6 +1313,8 @@ eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image)
   EGLBoolean ret;

   _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv);
   if (!disp->Extensions.KHR_image_base)
      RETURN_EGL_EVAL(disp, EGL_FALSE);
   if (!img)
      RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE);

@@ -1321,6 +1340,8 @@ eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list)
   EGLSyncKHR ret;

   _EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv);
   if (!disp->Extensions.KHR_reusable_sync)
      RETURN_EGL_EVAL(disp, EGL_NO_SYNC_KHR);

   sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list);
   ret = (sync) ? _eglLinkSync(sync, disp) : EGL_NO_SYNC_KHR;
@@ -1338,6 +1359,8 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync)
   EGLBoolean ret;

   _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
   assert(disp->Extensions.KHR_reusable_sync);

   _eglUnlinkSync(s);
   ret = drv->API.DestroySyncKHR(drv, disp, s);

@@ -1354,6 +1377,7 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t
   EGLint ret;

   _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
   assert(disp->Extensions.KHR_reusable_sync);
   ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout);

   RETURN_EGL_EVAL(disp, ret);
@@ -1369,6 +1393,7 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode)
   EGLBoolean ret;

   _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
   assert(disp->Extensions.KHR_reusable_sync);
   ret = drv->API.SignalSyncKHR(drv, disp, s, mode);

   RETURN_EGL_EVAL(disp, ret);
@@ -1384,6 +1409,7 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *v
   EGLBoolean ret;

   _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
   assert(disp->Extensions.KHR_reusable_sync);
   ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value);

   RETURN_EGL_EVAL(disp, ret);
@@ -1407,14 +1433,14 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,

   _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv);

   if (!disp->Extensions.NOK_swap_region)
      RETURN_EGL_EVAL(disp, EGL_FALSE);

   /* surface must be bound to current context in EGL 1.4 */
   if (!ctx || !_eglIsContextLinked(ctx) || surf != ctx->DrawSurface)
      RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);

   if (drv->API.SwapBuffersRegionNOK)
      ret = drv->API.SwapBuffersRegionNOK(drv, disp, surf, numRects, rects);
   else
      ret = drv->API.SwapBuffers(drv, disp, surf);
   ret = drv->API.SwapBuffersRegionNOK(drv, disp, surf, numRects, rects);

   RETURN_EGL_EVAL(disp, ret);
 }
@@ -1433,6 +1459,8 @@ eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list)
   EGLImageKHR ret;

   _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
   if (!disp->Extensions.MESA_drm_image)
      RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR);

   img = drv->API.CreateDRMImageMESA(drv, disp, attr_list);
   ret = (img) ? _eglLinkImage(img, disp) : EGL_NO_IMAGE_KHR;
@@ -1450,6 +1478,8 @@ eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image,
   EGLBoolean ret;

   _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv);
   assert(disp->Extensions.MESA_drm_image);

   if (!img)
      RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE);

--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -24,20 +24,18 @@
 * IDs are from 1 to N respectively.
 */
 void
 _eglInitConfig(_EGLConfig *config, _EGLDisplay *dpy, EGLint id)
 _eglInitConfig(_EGLConfig *conf, _EGLDisplay *dpy, EGLint id)
 {
   memset(config, 0, sizeof(*config));
   memset(conf, 0, sizeof(*conf));

   config->Display = dpy;
   conf->Display = dpy;

   /* some attributes take non-zero default values */
   SET_CONFIG_ATTRIB(config, EGL_CONFIG_ID,               id);
   SET_CONFIG_ATTRIB(config, EGL_CONFIG_CAVEAT,           EGL_NONE);
   SET_CONFIG_ATTRIB(config, EGL_TRANSPARENT_TYPE,        EGL_NONE);
   SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_TYPE,      EGL_NONE);
 #ifdef EGL_VERSION_1_2
   SET_CONFIG_ATTRIB(config, EGL_COLOR_BUFFER_TYPE,       EGL_RGB_BUFFER);
 #endif /* EGL_VERSION_1_2 */
   conf->ConfigID = id;
   conf->ConfigCaveat = EGL_NONE;
   conf->TransparentType = EGL_NONE;
   conf->NativeVisualType = EGL_NONE;
   conf->ColorBufferType = EGL_RGB_BUFFER;
 }


@@ -51,7 +49,7 @@ EGLConfig
 _eglAddConfig(_EGLDisplay *dpy, _EGLConfig *conf)
 {
   /* sanity check */
   assert(GET_CONFIG_ATTRIB(conf, EGL_CONFIG_ID) > 0);
   assert(conf->ConfigID > 0);

   if (!dpy->Configs) {
      dpy->Configs = _eglCreateArray("Config", 16);
@@ -104,6 +102,7 @@ static const struct {
   EGLint default_value;
 } _eglValidationTable[] =
 {
   /* core */
   { EGL_BUFFER_SIZE,               ATTRIB_TYPE_INTEGER,
                                    ATTRIB_CRITERION_ATLEAST,
                                    0 },
@@ -200,22 +199,13 @@ static const struct {
   { EGL_TRANSPARENT_BLUE_VALUE,    ATTRIB_TYPE_INTEGER,
                                    ATTRIB_CRITERION_EXACT,
                                    EGL_DONT_CARE },
   /* these are not real attributes */
   { EGL_MATCH_NATIVE_PIXMAP,       ATTRIB_TYPE_PSEUDO,
                                    ATTRIB_CRITERION_SPECIAL,
                                    EGL_NONE },
   /* there is a gap before EGL_SAMPLES */
   { 0x3030,                        ATTRIB_TYPE_PSEUDO,
                                    ATTRIB_CRITERION_IGNORE,
                                    0 },
   { EGL_NONE,                      ATTRIB_TYPE_PSEUDO,
                                    ATTRIB_CRITERION_IGNORE,
                                    0 },

   /* extensions */
   { EGL_Y_INVERTED_NOK,            ATTRIB_TYPE_BOOLEAN,
                                    ATTRIB_CRITERION_EXACT,
                                    EGL_DONT_CARE },

                                    EGL_DONT_CARE }
 };


@@ -232,18 +222,13 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
 {
   EGLint i, attr, val;
   EGLBoolean valid = EGL_TRUE;
   EGLint red_size = 0, green_size = 0, blue_size = 0, luminance_size = 0;
   EGLint alpha_size = 0, buffer_size = 0;

   /* all attributes should have been listed */
   assert(ARRAY_SIZE(_eglValidationTable) == _EGL_CONFIG_NUM_ATTRIBS);

   /* check attributes by their types */
   for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) {
      EGLint mask;

      attr = _eglValidationTable[i].attr;
      val = GET_CONFIG_ATTRIB(conf, attr);
      val = _eglGetConfigKey(conf, attr);

      switch (_eglValidationTable[i].type) {
      case ATTRIB_TYPE_INTEGER:
@@ -255,30 +240,14 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
            break;
         case EGL_SAMPLE_BUFFERS:
            /* there can be at most 1 sample buffer */
            if (val > 1)
            if (val > 1 || val < 0)
               valid = EGL_FALSE;
            break;
         case EGL_RED_SIZE:
            red_size = val;
            break;
         case EGL_GREEN_SIZE:
            green_size = val;
            break;
         case EGL_BLUE_SIZE:
            blue_size = val;
            break;
         case EGL_LUMINANCE_SIZE:
            luminance_size = val;
            break;
         case EGL_ALPHA_SIZE:
            alpha_size = val;
            break;
         case EGL_BUFFER_SIZE:
            buffer_size = val;
         default:
            if (val < 0)
               valid = EGL_FALSE;
            break;
         }
         if (val < 0)
            valid = EGL_FALSE;
         break;
      case ATTRIB_TYPE_BOOLEAN:
         if (val != EGL_TRUE && val != EGL_FALSE)
@@ -366,17 +335,18 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)

   /* now check for conflicting attribute values */

   switch (GET_CONFIG_ATTRIB(conf, EGL_COLOR_BUFFER_TYPE)) {
   switch (conf->ColorBufferType) {
   case EGL_RGB_BUFFER:
      if (luminance_size)
      if (conf->LuminanceSize)
         valid = EGL_FALSE;
      if (red_size + green_size + blue_size + alpha_size != buffer_size)
      if (conf->RedSize + conf->GreenSize +
            conf->BlueSize + conf->AlphaSize != conf->BufferSize)
         valid = EGL_FALSE;
      break;
   case EGL_LUMINANCE_BUFFER:
      if (red_size || green_size || blue_size)
      if (conf->RedSize || conf->GreenSize || conf->BlueSize)
         valid = EGL_FALSE;
      if (luminance_size + alpha_size != buffer_size)
      if (conf->LuminanceSize + conf->AlphaSize != conf->BufferSize)
         valid = EGL_FALSE;
      break;
   }
@@ -385,23 +355,19 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
      return EGL_FALSE;
   }

   val = GET_CONFIG_ATTRIB(conf, EGL_SAMPLE_BUFFERS);
   if (!val && GET_CONFIG_ATTRIB(conf, EGL_SAMPLES))
   if (!conf->SampleBuffers && conf->Samples)
      valid = EGL_FALSE;
   if (!valid) {
      _eglLog(_EGL_DEBUG, "conflicting samples and sample buffers");
      return EGL_FALSE;
   }

   val = GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE);
   if (!(val & EGL_WINDOW_BIT)) {
      if (GET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_ID) != 0 ||
          GET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE) != EGL_NONE)
   if (!(conf->SurfaceType & EGL_WINDOW_BIT)) {
      if (conf->NativeVisualID != 0 || conf->NativeVisualType != EGL_NONE)
         valid = EGL_FALSE;
   }
   if (!(val & EGL_PBUFFER_BIT)) {
      if (GET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGB) ||
          GET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGBA))
   if (!(conf->SurfaceType & EGL_PBUFFER_BIT)) {
      if (conf->BindToTextureRGB || conf->BindToTextureRGBA)
         valid = EGL_FALSE;
   }
   if (!valid) {
@@ -433,11 +399,11 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria)
         continue;

      attr = _eglValidationTable[i].attr;
      cmp = GET_CONFIG_ATTRIB(criteria, attr);
      cmp = _eglGetConfigKey(criteria, attr);
      if (cmp == EGL_DONT_CARE)
         continue;

      val = GET_CONFIG_ATTRIB(conf, attr);
      val = _eglGetConfigKey(conf, attr);
      switch (_eglValidationTable[i].criterion) {
      case ATTRIB_CRITERION_EXACT:
         if (val != cmp)
@@ -478,16 +444,11 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria)
 static INLINE EGLBoolean
 _eglIsConfigAttribValid(_EGLConfig *conf, EGLint attr)
 {
   if (_eglIndexConfig(conf, attr) < 0)
   if (_eglOffsetOfConfig(attr) < 0)
      return EGL_FALSE;

   /* there are some holes in the range */
   switch (attr) {
   case 0x3030 /* a gap before EGL_SAMPLES */:
   case EGL_NONE:
 #ifdef EGL_VERSION_1_4
   case EGL_MATCH_NATIVE_PIXMAP:
 #endif
      return EGL_FALSE;
   case EGL_Y_INVERTED_NOK:
      return conf->Display->Extensions.NOK_texture_from_pixmap;
@@ -506,15 +467,12 @@ EGLBoolean
 _eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list)
 {
   EGLint attr, val, i;
   EGLint config_id = 0, level = 0;
   EGLBoolean has_native_visual_type = EGL_FALSE;
   EGLBoolean has_transparent_color = EGL_FALSE;

   /* reset to default values */
   for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) {
      attr = _eglValidationTable[i].attr;
      val = _eglValidationTable[i].default_value;
      SET_CONFIG_ATTRIB(conf, attr, val);
      _eglSetConfigKey(conf, attr, val);
   }

   /* parse the list */
@@ -524,59 +482,33 @@ _eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list)

      if (!_eglIsConfigAttribValid(conf, attr))
 	 return EGL_FALSE;
 	      
      SET_CONFIG_ATTRIB(conf, attr, val);

      /* rememeber some attributes for post-processing */
      switch (attr) {
      case EGL_CONFIG_ID:
         config_id = val;
         break;
      case EGL_LEVEL:
         level = val;
         break;
      case EGL_NATIVE_VISUAL_TYPE:
         has_native_visual_type = EGL_TRUE;
         break;
      case EGL_TRANSPARENT_RED_VALUE:
      case EGL_TRANSPARENT_GREEN_VALUE:
      case EGL_TRANSPARENT_BLUE_VALUE:
         has_transparent_color = EGL_TRUE;
         break;
      default:
         break;
      }
      _eglSetConfigKey(conf, attr, val);
   }

   if (!_eglValidateConfig(conf, EGL_TRUE))
      return EGL_FALSE;

   /* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */
   if (level == EGL_DONT_CARE)
   if (conf->Level == EGL_DONT_CARE)
      return EGL_FALSE;

   /* ignore other attributes when EGL_CONFIG_ID is given */
   if (config_id > 0) {
      _eglResetConfigKeys(conf, EGL_DONT_CARE);
      SET_CONFIG_ATTRIB(conf, EGL_CONFIG_ID, config_id);
   if (conf->ConfigID > 0) {
      for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) {
         attr = _eglValidationTable[i].attr;
         if (attr != EGL_CONFIG_ID)
            _eglSetConfigKey(conf, attr, EGL_DONT_CARE);
      }
   }
   else {
      if (has_native_visual_type) {
         val = GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE);
         if (!(val & EGL_WINDOW_BIT))
            SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE, EGL_DONT_CARE);
      }
      if (!(conf->SurfaceType & EGL_WINDOW_BIT))
         conf->NativeVisualType = EGL_DONT_CARE;

      if (has_transparent_color) {
         val = GET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_TYPE);
         if (val == EGL_NONE) {
            SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_RED_VALUE,
                              EGL_DONT_CARE);
            SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_GREEN_VALUE,
                              EGL_DONT_CARE);
            SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_BLUE_VALUE,
                              EGL_DONT_CARE);
         }
      if (conf->TransparentType == EGL_NONE) {
         conf->TransparentRedValue = EGL_DONT_CARE;
         conf->TransparentGreenValue = EGL_DONT_CARE;
         conf->TransparentBlueValue = EGL_DONT_CARE;
      }
   }

@@ -610,7 +542,6 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
      EGL_ALPHA_MASK_SIZE,
   };
   EGLint val1, val2;
   EGLBoolean rgb_buffer;
   EGLint i;

   if (conf1 == conf2)
@@ -619,44 +550,41 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
   /* the enum values have the desired ordering */
   assert(EGL_NONE < EGL_SLOW_CONFIG);
   assert(EGL_SLOW_CONFIG < EGL_NON_CONFORMANT_CONFIG);
   val1 = GET_CONFIG_ATTRIB(conf1, EGL_CONFIG_CAVEAT);
   val2 = GET_CONFIG_ATTRIB(conf2, EGL_CONFIG_CAVEAT);
   if (val1 != val2)
      return (val1 - val2);
   val1 = conf1->ConfigCaveat - conf2->ConfigCaveat;
   if (val1)
      return val1;

   /* the enum values have the desired ordering */
   assert(EGL_RGB_BUFFER < EGL_LUMINANCE_BUFFER);
   val1 = GET_CONFIG_ATTRIB(conf1, EGL_COLOR_BUFFER_TYPE);
   val2 = GET_CONFIG_ATTRIB(conf2, EGL_COLOR_BUFFER_TYPE);
   if (val1 != val2)
      return (val1 - val2);
   rgb_buffer = (val1 == EGL_RGB_BUFFER);
   val1 = conf1->ColorBufferType - conf2->ColorBufferType;
   if (val1)
      return val1;

   if (criteria) {
      val1 = val2 = 0;
      if (rgb_buffer) {
         if (GET_CONFIG_ATTRIB(criteria, EGL_RED_SIZE) > 0) {
            val1 += GET_CONFIG_ATTRIB(conf1, EGL_RED_SIZE);
            val2 += GET_CONFIG_ATTRIB(conf2, EGL_RED_SIZE);
      if (conf1->ColorBufferType == EGL_RGB_BUFFER) {
         if (criteria->RedSize > 0) {
            val1 += conf1->RedSize;
            val2 += conf2->RedSize;
         }
         if (GET_CONFIG_ATTRIB(criteria, EGL_GREEN_SIZE) > 0) {
            val1 += GET_CONFIG_ATTRIB(conf1, EGL_GREEN_SIZE);
            val2 += GET_CONFIG_ATTRIB(conf2, EGL_GREEN_SIZE);
         if (criteria->GreenSize > 0) {
            val1 += conf1->GreenSize;
            val2 += conf2->GreenSize;
         }
         if (GET_CONFIG_ATTRIB(criteria, EGL_BLUE_SIZE) > 0) {
            val1 += GET_CONFIG_ATTRIB(conf1, EGL_BLUE_SIZE);
            val2 += GET_CONFIG_ATTRIB(conf2, EGL_BLUE_SIZE);
         if (criteria->BlueSize > 0) {
            val1 += conf1->BlueSize;
            val2 += conf2->BlueSize;
         }
      }
      else {
         if (GET_CONFIG_ATTRIB(criteria, EGL_LUMINANCE_SIZE) > 0) {
            val1 += GET_CONFIG_ATTRIB(conf1, EGL_LUMINANCE_SIZE);
            val2 += GET_CONFIG_ATTRIB(conf2, EGL_LUMINANCE_SIZE);
         if (criteria->LuminanceSize > 0) {
            val1 += conf1->LuminanceSize;
            val2 += conf2->LuminanceSize;
         }
      }
      if (GET_CONFIG_ATTRIB(criteria, EGL_ALPHA_SIZE) > 0) {
         val1 += GET_CONFIG_ATTRIB(conf1, EGL_ALPHA_SIZE);
         val2 += GET_CONFIG_ATTRIB(conf2, EGL_ALPHA_SIZE);
      if (criteria->AlphaSize > 0) {
         val1 += conf1->AlphaSize;
         val2 += conf2->AlphaSize;
      }
   }
   else {
@@ -669,24 +597,15 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
      return (val2 - val1);

   for (i = 0; i < ARRAY_SIZE(compare_attribs); i++) {
      val1 = GET_CONFIG_ATTRIB(conf1, compare_attribs[i]);
      val2 = GET_CONFIG_ATTRIB(conf2, compare_attribs[i]);
      val1 = _eglGetConfigKey(conf1, compare_attribs[i]);
      val2 = _eglGetConfigKey(conf2, compare_attribs[i]);
      if (val1 != val2)
         return (val1 - val2);
   }

   /* EGL_NATIVE_VISUAL_TYPE cannot be compared here */

   if (compare_id) {
      val1 = GET_CONFIG_ATTRIB(conf1, EGL_CONFIG_ID);
      val2 = GET_CONFIG_ATTRIB(conf2, EGL_CONFIG_ID);
      assert(val1 != val2);
   }
   else {
      val1 = val2 = 0;
   }

   return (val1 - val2);
   return (compare_id) ? (conf1->ConfigID - conf2->ConfigID) : 0;
 }


@@ -802,7 +721,7 @@ _eglGetConfigAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf,
   if (!value)
      return _eglError(EGL_BAD_PARAMETER, "eglGetConfigAttrib");

   *value = GET_CONFIG_ATTRIB(conf, attribute);
   *value = _eglGetConfigKey(conf, attribute);
   return EGL_TRUE;
 }

--- a/src/egl/main/eglconfig.h
+++ b/src/egl/main/eglconfig.h
@@ -6,26 +6,49 @@
 #include "egltypedefs.h"


 #define _EGL_CONFIG_FIRST_ATTRIB EGL_BUFFER_SIZE
 #define _EGL_CONFIG_LAST_ATTRIB EGL_CONFORMANT
 #define _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS \
   (_EGL_CONFIG_LAST_ATTRIB - _EGL_CONFIG_FIRST_ATTRIB + 1)

 /* Attributes outside the contiguous block:
 *
 *   EGL_Y_INVERTED_NOK
 */
 #define _EGL_CONFIG_FIRST_EXTRA_ATTRIB _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS
 #define _EGL_CONFIG_NUM_EXTRA_ATTRIBS 1

 #define _EGL_CONFIG_NUM_ATTRIBS \
   _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS + _EGL_CONFIG_NUM_EXTRA_ATTRIBS


 /* update _eglValidationTable and _eglOffsetOfConfig before updating this
 * struct */
 struct _egl_config
 {
   _EGLDisplay *Display;
   EGLint Storage[_EGL_CONFIG_NUM_ATTRIBS];

   /* core */
   EGLint BufferSize;
   EGLint AlphaSize;
   EGLint BlueSize;
   EGLint GreenSize;
   EGLint RedSize;
   EGLint DepthSize;
   EGLint StencilSize;
   EGLint ConfigCaveat;
   EGLint ConfigID;
   EGLint Level;
   EGLint MaxPbufferHeight;
   EGLint MaxPbufferPixels;
   EGLint MaxPbufferWidth;
   EGLint NativeRenderable;
   EGLint NativeVisualID;
   EGLint NativeVisualType;
   EGLint Samples;
   EGLint SampleBuffers;
   EGLint SurfaceType;
   EGLint TransparentType;
   EGLint TransparentBlueValue;
   EGLint TransparentGreenValue;
   EGLint TransparentRedValue;
   EGLint BindToTextureRGB;
   EGLint BindToTextureRGBA;
   EGLint MinSwapInterval;
   EGLint MaxSwapInterval;
   EGLint LuminanceSize;
   EGLint AlphaMaskSize;
   EGLint ColorBufferType;
   EGLint RenderableType;
   EGLint MatchNativePixmap;
   EGLint Conformant;

   /* extensions */
   EGLint YInvertedNOK;
 };


@@ -37,38 +60,56 @@ struct _egl_config


 /**
 * Given a key, return an index into the storage of the config.
 * Return -1 if the key is invalid.
 * Map an EGL attribute enum to the offset of the member in _EGLConfig.
 */
 static INLINE EGLint
 _eglIndexConfig(const _EGLConfig *conf, EGLint key)
 _eglOffsetOfConfig(EGLint attr)
 {
   (void) conf;
   if (key >= _EGL_CONFIG_FIRST_ATTRIB &&
       key < _EGL_CONFIG_FIRST_ATTRIB + _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS)
      return key - _EGL_CONFIG_FIRST_ATTRIB;
   
   switch (key) {
   case EGL_Y_INVERTED_NOK:
      return _EGL_CONFIG_FIRST_EXTRA_ATTRIB;
   switch (attr) {
 #define ATTRIB_MAP(attr, memb) case attr: return offsetof(_EGLConfig, memb)
   /* core */
   ATTRIB_MAP(EGL_BUFFER_SIZE,               BufferSize);
   ATTRIB_MAP(EGL_ALPHA_SIZE,                AlphaSize);
   ATTRIB_MAP(EGL_BLUE_SIZE,                 BlueSize);
   ATTRIB_MAP(EGL_GREEN_SIZE,                GreenSize);
   ATTRIB_MAP(EGL_RED_SIZE,                  RedSize);
   ATTRIB_MAP(EGL_DEPTH_SIZE,                DepthSize);
   ATTRIB_MAP(EGL_STENCIL_SIZE,              StencilSize);
   ATTRIB_MAP(EGL_CONFIG_CAVEAT,             ConfigCaveat);
   ATTRIB_MAP(EGL_CONFIG_ID,                 ConfigID);
   ATTRIB_MAP(EGL_LEVEL,                     Level);
   ATTRIB_MAP(EGL_MAX_PBUFFER_HEIGHT,        MaxPbufferHeight);
   ATTRIB_MAP(EGL_MAX_PBUFFER_PIXELS,        MaxPbufferPixels);
   ATTRIB_MAP(EGL_MAX_PBUFFER_WIDTH,         MaxPbufferWidth);
   ATTRIB_MAP(EGL_NATIVE_RENDERABLE,         NativeRenderable);
   ATTRIB_MAP(EGL_NATIVE_VISUAL_ID,          NativeVisualID);
   ATTRIB_MAP(EGL_NATIVE_VISUAL_TYPE,        NativeVisualType);
   ATTRIB_MAP(EGL_SAMPLES,                   Samples);
   ATTRIB_MAP(EGL_SAMPLE_BUFFERS,            SampleBuffers);
   ATTRIB_MAP(EGL_SURFACE_TYPE,              SurfaceType);
   ATTRIB_MAP(EGL_TRANSPARENT_TYPE,          TransparentType);
   ATTRIB_MAP(EGL_TRANSPARENT_BLUE_VALUE,    TransparentBlueValue);
   ATTRIB_MAP(EGL_TRANSPARENT_GREEN_VALUE,   TransparentGreenValue);
   ATTRIB_MAP(EGL_TRANSPARENT_RED_VALUE,     TransparentRedValue);
   ATTRIB_MAP(EGL_BIND_TO_TEXTURE_RGB,       BindToTextureRGB);
   ATTRIB_MAP(EGL_BIND_TO_TEXTURE_RGBA,      BindToTextureRGBA);
   ATTRIB_MAP(EGL_MIN_SWAP_INTERVAL,         MinSwapInterval);
   ATTRIB_MAP(EGL_MAX_SWAP_INTERVAL,         MaxSwapInterval);
   ATTRIB_MAP(EGL_LUMINANCE_SIZE,            LuminanceSize);
   ATTRIB_MAP(EGL_ALPHA_MASK_SIZE,           AlphaMaskSize);
   ATTRIB_MAP(EGL_COLOR_BUFFER_TYPE,         ColorBufferType);
   ATTRIB_MAP(EGL_RENDERABLE_TYPE,           RenderableType);
   ATTRIB_MAP(EGL_MATCH_NATIVE_PIXMAP,       MatchNativePixmap);
   ATTRIB_MAP(EGL_CONFORMANT,                Conformant);
   /* extensions */
   ATTRIB_MAP(EGL_Y_INVERTED_NOK,            YInvertedNOK);
 #undef ATTRIB_MAP
   default:
      return -1;
   }
 }


 /**
 * Reset all keys in the config to a given value.
 */
 static INLINE void
 _eglResetConfigKeys(_EGLConfig *conf, EGLint val)
 {
   EGLint i;
   for (i = 0; i < _EGL_CONFIG_NUM_ATTRIBS; i++)
      conf->Storage[i] = val;
 }


 /**
 * Update a config for a given key.
 *
@@ -79,9 +120,9 @@ _eglResetConfigKeys(_EGLConfig *conf, EGLint val)
 static INLINE void
 _eglSetConfigKey(_EGLConfig *conf, EGLint key, EGLint val)
 {
   EGLint idx = _eglIndexConfig(conf, key);
   assert(idx >= 0);
   conf->Storage[idx] = val;
   EGLint offset = _eglOffsetOfConfig(key);
   assert(offset >= 0);
   *((EGLint *) ((char *) conf + offset)) = val;
 }


@@ -91,9 +132,9 @@ _eglSetConfigKey(_EGLConfig *conf, EGLint key, EGLint val)
 static INLINE EGLint
 _eglGetConfigKey(const _EGLConfig *conf, EGLint key)
 {
   EGLint idx = _eglIndexConfig(conf, key);
   assert(idx >= 0);
   return conf->Storage[idx];
   EGLint offset = _eglOffsetOfConfig(key);
   assert(offset >= 0);
   return *((EGLint *) ((char *) conf + offset));
 }


--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -113,13 +113,12 @@ _eglInitContext(_EGLContext *ctx, _EGLDisplay *dpy, _EGLConfig *conf,

   err = _eglParseContextAttribList(ctx, attrib_list);
   if (err == EGL_SUCCESS && ctx->Config) {
      EGLint renderable_type, api_bit;
      EGLint api_bit;

      renderable_type = GET_CONFIG_ATTRIB(ctx->Config, EGL_RENDERABLE_TYPE);
      api_bit = _eglGetContextAPIBit(ctx);
      if (!(renderable_type & api_bit)) {
      if (!(ctx->Config->RenderableType & api_bit)) {
         _eglLog(_EGL_DEBUG, "context api is 0x%x while config supports 0x%x",
               api_bit, renderable_type);
               api_bit, ctx->Config->RenderableType);
         err = EGL_BAD_CONFIG;
      }
   }
@@ -183,7 +182,9 @@ _eglQueryContext(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *c,

   switch (attribute) {
   case EGL_CONFIG_ID:
      *value = GET_CONFIG_ATTRIB(c->Config, EGL_CONFIG_ID);
      if (!c->Config)
         return _eglError(EGL_BAD_ATTRIBUTE, "eglQueryContext");
      *value = c->Config->ConfigID;
      break;
   case EGL_CONTEXT_CLIENT_VERSION:
      *value = c->ClientVersion;
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -27,7 +27,7 @@ _eglGetNativePlatformFromEnv(void)
   } egl_platforms[_EGL_NUM_PLATFORMS] = {
      { _EGL_PLATFORM_WINDOWS, "gdi" },
      { _EGL_PLATFORM_X11, "x11" },
      { _EGL_PLATFORM_DRM, "kms" },
      { _EGL_PLATFORM_DRM, "drm" },
      { _EGL_PLATFORM_FBDEV, "fbdev" }
   };
   _EGLPlatformType plat = _EGL_INVALID_PLATFORM;
--- a/src/egl/main/egldriver.c
+++ b/src/egl/main/egldriver.c
@@ -18,7 +18,6 @@
 #include "eglmisc.h"
 #include "eglmode.h"
 #include "eglscreen.h"
 #include "eglstring.h"
 #include "eglsurface.h"
 #include "eglimage.h"
 #include "eglsync.h"
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -17,12 +17,12 @@
 static void
 _eglClampSwapInterval(_EGLSurface *surf, EGLint interval)
 {
   EGLint bound = GET_CONFIG_ATTRIB(surf->Config, EGL_MAX_SWAP_INTERVAL);
   EGLint bound = surf->Config->MaxSwapInterval;
   if (interval >= bound) {
      interval = bound;
   }
   else {
      bound = GET_CONFIG_ATTRIB(surf->Config, EGL_MIN_SWAP_INTERVAL);
      bound = surf->Config->MinSwapInterval;
      if (interval < bound)
         interval = bound;
   }
@@ -263,7 +263,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
      return EGL_FALSE;
   }

   if ((GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE) & type) == 0) {
   if ((conf->SurfaceType & type) == 0) {
      /* The config can't be used to create a surface of this type */
      _eglError(EGL_BAD_CONFIG, func);
      return EGL_FALSE;
@@ -333,7 +333,7 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
      *value = surface->Height;
      break;
   case EGL_CONFIG_ID:
      *value = GET_CONFIG_ATTRIB(surface->Config, EGL_CONFIG_ID);
      *value = surface->Config->ConfigID;
      break;
   case EGL_LARGEST_PBUFFER:
      *value = surface->LargestPbuffer;
@@ -445,7 +445,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,

   switch (attribute) {
   case EGL_MIPMAP_LEVEL:
      confval = GET_CONFIG_ATTRIB(surface->Config, EGL_RENDERABLE_TYPE);
      confval = surface->Config->RenderableType;
      if (!(confval & (EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT))) {
         err = EGL_BAD_PARAMETER;
         break;
@@ -457,7 +457,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
      case EGL_MULTISAMPLE_RESOLVE_DEFAULT:
         break;
      case EGL_MULTISAMPLE_RESOLVE_BOX:
         confval = GET_CONFIG_ATTRIB(surface->Config, EGL_SURFACE_TYPE);
         confval = surface->Config->SurfaceType;
         if (!(confval & EGL_MULTISAMPLE_RESOLVE_BOX_BIT))
            err = EGL_BAD_MATCH;
         break;
@@ -474,7 +474,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
      case EGL_BUFFER_DESTROYED:
         break;
      case EGL_BUFFER_PRESERVED:
         confval = GET_CONFIG_ATTRIB(surface->Config, EGL_SURFACE_TYPE);
         confval = surface->Config->SurfaceType;
         if (!(confval & EGL_SWAP_BEHAVIOR_PRESERVED_BIT))
            err = EGL_BAD_MATCH;
         break;
--- a/src/gallium/Makefile.template
+++ b/src/gallium/Makefile.template
@@ -40,7 +40,7 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURC
 	touch depend
 	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(GENERATED_SOURCES) 2> /dev/null

 $(PROGS): % : %.o
 $(PROGS): % : %.o $(PROGS_DEPS)
 	$(LD) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group  $(LIBS) -Wl,--end-group

 # Emacs tags
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -8,6 +8,7 @@ C_SOURCES = \
 	cso_cache/cso_context.c \
 	cso_cache/cso_hash.c \
 	draw/draw_context.c \
 	draw/draw_fs.c \
 	draw/draw_gs.c \
 	draw/draw_pipe.c \
 	draw/draw_pipe_aaline.c \
@@ -121,6 +122,7 @@ C_SOURCES = \
 	util/u_handle_table.c \
 	util/u_hash.c \
 	util/u_hash_table.c \
 	util/u_index_modify.c \
 	util/u_keymap.c \
 	util/u_linear.c \
 	util/u_linkage.c \
@@ -174,6 +176,7 @@ GALLIVM_SOURCES = \
        gallivm/lp_bld_struct.c \
        gallivm/lp_bld_swizzle.c \
        gallivm/lp_bld_tgsi_aos.c \
        gallivm/lp_bld_tgsi_info.c \
        gallivm/lp_bld_tgsi_soa.c \
        gallivm/lp_bld_type.c \
        draw/draw_llvm.c \
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -54,6 +54,7 @@ source = [
    'cso_cache/cso_context.c',
    'cso_cache/cso_hash.c',
    'draw/draw_context.c',
    'draw/draw_fs.c',
    'draw/draw_gs.c',
    'draw/draw_pipe.c',
    'draw/draw_pipe_aaline.c',
@@ -170,6 +171,7 @@ source = [
    'util/u_handle_table.c',
    'util/u_hash.c',
    'util/u_hash_table.c',
    'util/u_index_modify.c',
    'util/u_keymap.c',
    'util/u_linear.c',
    'util/u_linkage.c',
@@ -225,6 +227,7 @@ if env['llvm']:
    'gallivm/lp_bld_struct.c',
    'gallivm/lp_bld_swizzle.c',
    'gallivm/lp_bld_tgsi_aos.c',
    'gallivm/lp_bld_tgsi_info.c',
    'gallivm/lp_bld_tgsi_soa.c',
    'gallivm/lp_bld_type.c',
    'draw/draw_llvm.c',
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -413,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
 }



 /**
 * Allocate an extra vertex/geometry shader vertex attribute.
 * This is used by some of the optional draw module stages such
 * as wide_point which may need to allocate additional generic/texcoord
 * attributes.
 */
 int
 draw_alloc_extra_vertex_attrib(struct draw_context *draw,
                               uint semantic_name, uint semantic_index)
 {
   const int num_outputs = draw_current_shader_outputs(draw);
   const int n = draw->extra_shader_outputs.num;

   assert(n < Elements(draw->extra_shader_outputs.semantic_name));

   draw->extra_shader_outputs.semantic_name[n] = semantic_name;
   draw->extra_shader_outputs.semantic_index[n] = semantic_index;
   draw->extra_shader_outputs.slot[n] = num_outputs + n;
   draw->extra_shader_outputs.num++;

   return draw->extra_shader_outputs.slot[n];
 }


 /**
 * Remove all extra vertex attributes that were allocated with
 * draw_alloc_extra_vertex_attrib().
 */
 void
 draw_remove_extra_vertex_attribs(struct draw_context *draw)
 {
   draw->extra_shader_outputs.num = 0;
 }


 /**
 * Ask the draw module for the location/slot of the given vertex attribute in
 * a post-transformed vertex.
@@ -446,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw,
         return i;
   }

   /* XXX there may be more than one extra vertex attrib.
    * For example, simulated gl_FragCoord and gl_PointCoord.
    */
   if (draw->extra_shader_outputs.semantic_name == semantic_name &&
       draw->extra_shader_outputs.semantic_index == semantic_index) {
      return draw->extra_shader_outputs.slot;
   /* Search the extra vertex attributes */
   for (i = 0; i < draw->extra_shader_outputs.num; i++) {
      if (draw->extra_shader_outputs.semantic_name[i] == semantic_name &&
          draw->extra_shader_outputs.semantic_index[i] == semantic_index) {
         return draw->extra_shader_outputs.slot[i];
      }
   }

   return 0;
@@ -470,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw,
 uint
 draw_num_shader_outputs(const struct draw_context *draw)
 {
   uint count = draw->vs.vertex_shader->info.num_outputs;
   uint count;

   /* If a geometry shader is present, its outputs go to the
    * driver, else the vertex shader's outputs.
    */
   if (draw->gs.geometry_shader)
      count = draw->gs.geometry_shader->info.num_outputs;
   else
      count = draw->vs.vertex_shader->info.num_outputs;

   count += draw->extra_shader_outputs.num;

   if (draw->extra_shader_outputs.slot > 0)
      count++;
   return count;
 }

@@ -671,6 +709,11 @@ draw_set_samplers(struct draw_context *draw,
      draw->samplers[i] = NULL;

   draw->num_samplers = num;

 #ifdef HAVE_LLVM
   if (draw->llvm)
      draw_llvm_set_sampler_state(draw);
 #endif
 }

 void
@@ -678,9 +721,9 @@ draw_set_mapped_texture(struct draw_context *draw,
                        unsigned sampler_idx,
                        uint32_t width, uint32_t height, uint32_t depth,
                        uint32_t last_level,
                        uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
                        uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
                        const void *data[DRAW_MAX_TEXTURE_LEVELS])
                        uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
                        uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
                        const void *data[PIPE_MAX_TEXTURE_LEVELS])
 {
 #ifdef HAVE_LLVM
   if(draw->llvm)
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -46,9 +46,9 @@ struct draw_context;
 struct draw_stage;
 struct draw_vertex_shader;
 struct draw_geometry_shader;
 struct draw_fragment_shader;
 struct tgsi_sampler;

 #define DRAW_MAX_TEXTURE_LEVELS 13  /* 4K x 4K for now */

 struct draw_context *draw_create( struct pipe_context *pipe );

@@ -119,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw,
                        unsigned sampler_idx,
                        uint32_t width, uint32_t height, uint32_t depth,
                        uint32_t last_level,
                        uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
                        uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
                        const void *data[DRAW_MAX_TEXTURE_LEVELS]);
                        uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
                        uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
                        const void *data[PIPE_MAX_TEXTURE_LEVELS]);


 /*
@@ -137,6 +137,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
                               struct draw_vertex_shader *dvs);


 /*
 * Fragment shader functions
 */
 struct draw_fragment_shader *
 draw_create_fragment_shader(struct draw_context *draw,
                            const struct pipe_shader_state *shader);
 void draw_bind_fragment_shader(struct draw_context *draw,
                               struct draw_fragment_shader *dvs);
 void draw_delete_fragment_shader(struct draw_context *draw,
                                 struct draw_fragment_shader *dvs);

 /*
 * Geometry shader functions
 */
--- a/src/gallium/auxiliary/draw/draw_fs.c
+++ b/src/gallium/auxiliary/draw/draw_fs.c
@@ -0,0 +1,73 @@
 /**************************************************************************
 *
 * Copyright 2010 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

 #include "pipe/p_shader_tokens.h"

 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"

 #include "tgsi/tgsi_parse.h"

 #include "draw_fs.h"
 #include "draw_private.h"
 #include "draw_context.h"


 struct draw_fragment_shader *
 draw_create_fragment_shader(struct draw_context *draw,
                            const struct pipe_shader_state *shader)
 {
   struct draw_fragment_shader *dfs;

   dfs = CALLOC_STRUCT(draw_fragment_shader);
   if (dfs) {
      dfs->base = *shader;
      tgsi_scan_shader(shader->tokens, &dfs->info);
   }

   return dfs;
 }


 void
 draw_bind_fragment_shader(struct draw_context *draw,
                          struct draw_fragment_shader *dfs)
 {
   draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);

   draw->fs.fragment_shader = dfs;
 }


 void
 draw_delete_fragment_shader(struct draw_context *draw,
                            struct draw_fragment_shader *dfs)
 {
   FREE(dfs);
 }

--- a/src/gallium/auxiliary/draw/draw_fs.h
+++ b/src/gallium/auxiliary/draw/draw_fs.h
@@ -0,0 +1,42 @@
 /**************************************************************************
 *
 * Copyright 2010 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

 #ifndef DRAW_FS_H
 #define DRAW_FS_H


 #include "tgsi/tgsi_scan.h"


 struct draw_fragment_shader
 {
   struct pipe_shader_state base;
   struct tgsi_shader_info info;
 };


 #endif /* DRAW_FS_H */
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -44,6 +44,7 @@
 #include "tgsi/tgsi_dump.h"

 #include "util/u_cpu_detect.h"
 #include "util/u_math.h"
 #include "util/u_pointer.h"
 #include "util/u_string.h"

@@ -71,12 +72,17 @@ init_globals(struct draw_llvm *llvm)
      elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
      elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
      elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
      elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
      elem_types[DRAW_JIT_TEXTURE_DATA] =
         LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
                       DRAW_MAX_TEXTURE_LEVELS);
                       PIPE_MAX_TEXTURE_LEVELS);
      elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
      elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
      elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
      elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 
         LLVMArrayType(LLVMFloatType(), 4);

      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);

@@ -101,6 +107,18 @@ init_globals(struct draw_llvm *llvm)
      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
                             llvm->target, texture_type,
                             DRAW_JIT_TEXTURE_DATA);
      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
                             llvm->target, texture_type,
                             DRAW_JIT_TEXTURE_MIN_LOD);
      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
                             llvm->target, texture_type,
                             DRAW_JIT_TEXTURE_MAX_LOD);
      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
                             llvm->target, texture_type,
                             DRAW_JIT_TEXTURE_LOD_BIAS);
      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
                             llvm->target, texture_type,
                             DRAW_JIT_TEXTURE_BORDER_COLOR);
      LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
                           llvm->target, texture_type);

@@ -1048,9 +1066,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
                             unsigned sampler_idx,
                             uint32_t width, uint32_t height, uint32_t depth,
                             uint32_t last_level,
                             uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
                             uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
                             const void *data[DRAW_MAX_TEXTURE_LEVELS])
                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
 {
   unsigned j;
   struct draw_jit_texture *jit_tex;
@@ -1072,6 +1090,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
   }
 }


 void
 draw_llvm_set_sampler_state(struct draw_context *draw)
 {
   unsigned i;

   for (i = 0; i < draw->num_samplers; i++) {
      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];

      if (draw->samplers[i]) {
         jit_tex->min_lod = draw->samplers[i]->min_lod;
         jit_tex->max_lod = draw->samplers[i]->max_lod;
         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
      }
   }
 }


 void
 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
 {
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -41,7 +41,6 @@
 #include <llvm-c/Target.h>
 #include <llvm-c/ExecutionEngine.h>

 #define DRAW_MAX_TEXTURE_LEVELS 13  /* 4K x 4K for now */

 struct draw_llvm;
 struct llvm_vertex_shader;
@@ -52,9 +51,13 @@ struct draw_jit_texture
   uint32_t height;
   uint32_t depth;
   uint32_t last_level;
   uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
   uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
   const void *data[DRAW_MAX_TEXTURE_LEVELS];
   uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
   uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
   const void *data[PIPE_MAX_TEXTURE_LEVELS];
   float min_lod;
   float max_lod;
   float lod_bias;
   float border_color[4];
 };

 enum {
@@ -65,6 +68,10 @@ enum {
   DRAW_JIT_TEXTURE_ROW_STRIDE,
   DRAW_JIT_TEXTURE_IMG_STRIDE,
   DRAW_JIT_TEXTURE_DATA,
   DRAW_JIT_TEXTURE_MIN_LOD,
   DRAW_JIT_TEXTURE_MAX_LOD,
   DRAW_JIT_TEXTURE_LOD_BIAS,
   DRAW_JIT_TEXTURE_BORDER_COLOR,
   DRAW_JIT_TEXTURE_NUM_FIELDS  /* number of fields above */
 };

@@ -274,13 +281,16 @@ struct lp_build_sampler_soa *
 draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
                             LLVMValueRef context_ptr);

 void
 draw_llvm_set_sampler_state(struct draw_context *draw);

 void
 draw_llvm_set_mapped_texture(struct draw_context *draw,
                             unsigned sampler_idx,
                             uint32_t width, uint32_t height, uint32_t depth,
                             uint32_t last_level,
                             uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
                             uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
                             const void *data[DRAW_MAX_TEXTURE_LEVELS]);
                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
                             const void *data[PIPE_MAX_TEXTURE_LEVELS]);

 #endif
--- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -146,6 +146,10 @@ DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE)
 DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE)
 DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE)
 DRAW_LLVM_TEXTURE_MEMBER(data_ptr,   DRAW_JIT_TEXTURE_DATA, FALSE)
 DRAW_LLVM_TEXTURE_MEMBER(min_lod,    DRAW_JIT_TEXTURE_MIN_LOD, TRUE)
 DRAW_LLVM_TEXTURE_MEMBER(max_lod,    DRAW_JIT_TEXTURE_MAX_LOD, TRUE)
 DRAW_LLVM_TEXTURE_MEMBER(lod_bias,   DRAW_JIT_TEXTURE_LOD_BIAS, TRUE)
 DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE)


 static void
@@ -207,6 +211,10 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
   sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride;
   sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride;
   sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr;
   sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod;
   sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod;
   sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias;
   sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color;
   sampler->dynamic_state.static_state = static_state;
   sampler->dynamic_state.context_ptr = context_ptr;

--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -688,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
   aaline->tex_slot = draw_current_shader_outputs(draw);
   aaline->pos_slot = draw_current_shader_position_output(draw);;

   /* advertise the extra post-transformed vertex attribute */
   draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
   draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
   draw->extra_shader_outputs.slot = aaline->tex_slot;
   /* allocate the extra post-transformed vertex attribute */
   (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
                                         aaline->fs->generic_attrib);

   /* how many samplers? */
   /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -744,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)

   draw->suspend_flushing = FALSE;

   draw->extra_shader_outputs.slot = 0;
   draw_remove_extra_vertex_attribs(draw);
 }


--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)

   aapoint->pos_slot = draw_current_shader_position_output(draw);

   draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
   draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
   draw->extra_shader_outputs.slot = aapoint->tex_slot;
   /* allocate the extra post-transformed vertex attribute */
   (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
                                         aapoint->fs->generic_attrib);

   /* find psize slot in post-transform vertex */
   aapoint->psize_slot = -1;
@@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)

   draw->suspend_flushing = FALSE;

   draw->extra_shader_outputs.slot = 0;
   draw_remove_extra_vertex_attribs(draw);
 }


--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -263,6 +263,8 @@ do_clip_tri( struct draw_stage *stage,
      clipmask &= ~(1<<plane_idx);

      assert(n < MAX_CLIPPED_VERTICES);
      if (n >= MAX_CLIPPED_VERTICES)
         return;
      inlist[n] = inlist[0]; /* prevent rotation of vertices */

      for (i = 1; i <= n; i++) {
@@ -272,16 +274,22 @@ do_clip_tri( struct draw_stage *stage,

 	 if (!IS_NEGATIVE(dp_prev)) {
            assert(outcount < MAX_CLIPPED_VERTICES);
            if (outcount >= MAX_CLIPPED_VERTICES)
               return;
 	    outlist[outcount++] = vert_prev;
 	 }

 	 if (DIFFERENT_SIGNS(dp, dp_prev)) {
 	    struct vertex_header *new_vert;

            assert(tmpnr < MAX_CLIPPED_VERTICES+1);
            assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
            if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
               return;
            new_vert = clipper->stage.tmp[tmpnr++];

            assert(outcount < MAX_CLIPPED_VERTICES);
            if (outcount >= MAX_CLIPPED_VERTICES)
               return;
 	    outlist[outcount++] = new_vert;

 	    if (IS_NEGATIVE(dp)) {
@@ -321,27 +329,32 @@ do_clip_tri( struct draw_stage *stage,

   /* If flat-shading, copy provoking vertex color to polygon vertex[0]
    */
   if (clipper->flat) {
      if (stage->draw->rasterizer->flatshade_first) {
         if (inlist[0] != header->v[0]) {
            assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
            inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
            copy_colors(stage, inlist[0], header->v[0]);
   if (n >= 3) {
      if (clipper->flat) {
         if (stage->draw->rasterizer->flatshade_first) {
            if (inlist[0] != header->v[0]) {
               assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
               if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
                  return;
               inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
               copy_colors(stage, inlist[0], header->v[0]);
            }
         }
      }
      else {
         if (inlist[0] != header->v[2]) {
            assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
            inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
            copy_colors(stage, inlist[0], header->v[2]);
         else {
            if (inlist[0] != header->v[2]) {
               assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
               if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
                  return;
               inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
               copy_colors(stage, inlist[0], header->v[2]);
            }
         }
      }
   }

   /* Emit the polygon as triangles to the setup stage:
    */
   if (n >= 3)
      
      /* Emit the polygon as triangles to the setup stage:
       */
      emit_poly( stage, inlist, n, header );
   }
 }


--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -172,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
   wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
                 && !rast->line_smooth);

   /* drawing large points? */
   /* drawing large/sprite points (but not AA points)? */
   if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
      wide_points = TRUE;
   else if (rast->point_smooth && draw->pipeline.aapoint)
@@ -207,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
      precalc_flat = TRUE;
   }

   if (wide_points || rast->sprite_coord_enable) {
   if (wide_points) {
      draw->pipeline.wide_point->next = next;
      next = draw->pipeline.wide_point;
   }
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -57,26 +57,24 @@
 #include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw_fs.h"
 #include "draw_vs.h"
 #include "draw_pipe.h"


 struct widepoint_stage {
   struct draw_stage stage;
   struct draw_stage stage;  /**< base class */

   float half_point_size;

   float xbias;
   float ybias;

   uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
   uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS];
   uint num_texcoords;
   uint texcoord_mode;
   /** for automatic texcoord generation/replacement */
   uint num_texcoord_gen;
   uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS];

   int psize_slot;

   int point_coord_fs_input;  /**< input for pointcoord */
 };


@@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage )
 static void set_texcoords(const struct widepoint_stage *wide,
                          struct vertex_header *v, const float tc[4])
 {
   const struct draw_context *draw = wide->stage.draw;
   const struct pipe_rasterizer_state *rast = draw->rasterizer;
   const uint texcoord_mode = rast->sprite_coord_mode;
   uint i;
   for (i = 0; i < wide->num_texcoords; i++) {
      if (wide->texcoord_enable[i]) {
         uint j = wide->texcoord_slot[i];
         v->data[j][0] = tc[0];
         if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
            v->data[j][1] = 1.0f - tc[1];
         else
            v->data[j][1] = tc[1];
         v->data[j][2] = tc[2];
         v->data[j][3] = tc[3];
      }
   }

   if (wide->point_coord_fs_input >= 0) {
      /* put gl_PointCoord into the extra vertex slot */
      uint slot = wide->stage.draw->extra_shader_outputs.slot;
   for (i = 0; i < wide->num_texcoord_gen; i++) {
      const uint slot = wide->texcoord_gen_slot[i];
      v->data[slot][0] = tc[0];
      if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
      if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
         v->data[slot][1] = 1.0f - tc[1];
      else
         v->data[slot][1] = tc[1];
      v->data[slot][2] = 0.0F;
      v->data[slot][3] = 1.0F;
      v->data[slot][2] = tc[2];
      v->data[slot][3] = tc[3];
   }
 }

@@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage,
 }


 static int
 find_pntc_input_attrib(struct draw_context *draw)
 {
   /* Scan the fragment program's input decls to find the pointcoord
    * attribute.  The xy components will store the point coord.
    */
   return 0; /* XXX fix this */
 }


 static void widepoint_first_point( struct draw_stage *stage, 
 			      struct prim_header *header )
 static void
 widepoint_first_point(struct draw_stage *stage, 
                      struct prim_header *header)
 {
   struct widepoint_stage *wide = widepoint_stage(stage);
   struct draw_context *draw = stage->draw;
@@ -244,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage,
      stage->point = draw_pipe_passthrough_point;
   }

   draw_remove_extra_vertex_attribs(draw);

   if (rast->point_quad_rasterization) {
      /* find vertex shader texcoord outputs */
      const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
      uint i, j = 0;
      wide->texcoord_mode = rast->sprite_coord_mode;
      for (i = 0; i < vs->info.num_outputs; i++) {
         if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
            wide->texcoord_slot[j] = i;
            wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1;
            j++;
      const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
      uint i;

      wide->num_texcoord_gen = 0;

      /* Loop over fragment shader inputs looking for generic inputs
       * for which bit 'k' in sprite_coord_enable is set.
       */
      for (i = 0; i < fs->info.num_inputs; i++) {
         if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
            const int generic_index = fs->info.input_semantic_index[i];
            /* Note that sprite_coord enable is a bitfield of
             * PIPE_MAX_SHADER_OUTPUTS bits.
             */
            if (generic_index < PIPE_MAX_SHADER_OUTPUTS &&
                (rast->sprite_coord_enable & (1 << generic_index))) {
               /* OK, this generic attribute needs to be replaced with a
                * texcoord (see above).
                */
               int slot = draw_find_shader_output(draw,
                                                  TGSI_SEMANTIC_GENERIC,
                                                  generic_index);

               if (slot > 0) {
                  /* there's already a post-vertex shader attribute
                   * for this fragment shader input attribute.
                   */
               }
               else {
                  /* need to allocate a new post-vertex shader attribute */
                  slot = draw_alloc_extra_vertex_attrib(draw,
                                                        TGSI_SEMANTIC_GENERIC,
                                                        generic_index);
               }

               /* add this slot to the texcoord-gen list */
               wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
            }
         }
      }
      wide->num_texcoords = j;

      /* find fragment shader PointCoord input */
      wide->point_coord_fs_input = find_pntc_input_attrib(draw);

      /* setup extra vp output (point coord implemented as a texcoord) */
      draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
      draw->extra_shader_outputs.semantic_index = 0;
      draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
   }
   else {
      wide->point_coord_fs_input = -1;
      draw->extra_shader_outputs.slot = 0;
   }

   wide->psize_slot = -1;
@@ -295,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )

   stage->point = widepoint_first_point;
   stage->next->flush( stage->next, flags );
   stage->draw->extra_shader_outputs.slot = 0;

   draw_remove_extra_vertex_attribs(draw);

   /* restore original rasterizer state */
   if (draw->rast_handle) {
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -250,6 +250,11 @@ struct draw_context
      struct tgsi_sampler **samplers;
   } gs;

   /** Fragment shader state */
   struct {
      struct draw_fragment_shader *fragment_shader;
   } fs;

   /** Stream output (vertex feedback) state */
   struct {
      struct pipe_stream_output_state state;
@@ -266,9 +271,10 @@ struct draw_context
   /* If a prim stage introduces new vertex attributes, they'll be stored here
    */
   struct {
      uint semantic_name;
      uint semantic_index;
      int slot;
      uint num;
      uint semantic_name[10];
      uint semantic_index[10];
      uint slot[10];
   } extra_shader_outputs;

   unsigned reduced_prim;
@@ -362,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw );
 uint draw_current_shader_outputs(const struct draw_context *draw);
 uint draw_current_shader_position_output(const struct draw_context *draw);

 int draw_alloc_extra_vertex_attrib(struct draw_context *draw,
                                   uint semantic_name, uint semantic_index);
 void draw_remove_extra_vertex_attribs(struct draw_context *draw);


 /*******************************************************************************
 * Vertex processing (was passthrough) code:
 */
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -614,17 +614,15 @@ lp_build_div(struct lp_build_context *bld,


 /**
 * Linear interpolation.
 *
 * This also works for integer values with a few caveats.
 * Linear interpolation -- without any checks.
 *
 * @sa http://www.stereopsis.com/doubleblend.html
 */
 LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
              LLVMValueRef x,
              LLVMValueRef v0,
              LLVMValueRef v1)
 static INLINE LLVMValueRef
 lp_build_lerp_simple(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef v0,
                     LLVMValueRef v1)
 {
   LLVMValueRef delta;
   LLVMValueRef res;
@@ -639,12 +637,80 @@ lp_build_lerp(struct lp_build_context *bld,

   res = lp_build_add(bld, v0, res);

   if(bld->type.fixed)
   if (bld->type.fixed) {
      /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
       * but it will be wrong for other uses. Basically we need a more
       * powerful lp_type, capable of further distinguishing the values
       * interpretation from the value storage. */
      res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
   }

   return res;
 }


 /**
 * Linear interpolation.
 */
 LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
              LLVMValueRef x,
              LLVMValueRef v0,
              LLVMValueRef v1)
 {
   const struct lp_type type = bld->type;
   LLVMValueRef res;

   assert(lp_check_value(type, x));
   assert(lp_check_value(type, v0));
   assert(lp_check_value(type, v1));

   if (type.norm) {
      struct lp_type wide_type;
      struct lp_build_context wide_bld;
      LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
      LLVMValueRef shift;

      assert(type.length >= 2);
      assert(!type.sign);

      /*
       * Create a wider type, enough to hold the intermediate result of the
       * multiplication.
       */
      memset(&wide_type, 0, sizeof wide_type);
      wide_type.fixed  = TRUE;
      wide_type.width  = type.width*2;
      wide_type.length = type.length/2;

      lp_build_context_init(&wide_bld, bld->builder, wide_type);

      lp_build_unpack2(bld->builder, type, wide_type, x,  &xl,  &xh);
      lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h);
      lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h);

      /*
       * Scale x from [0, 255] to [0, 256]
       */

      shift = lp_build_const_int_vec(wide_type, type.width - 1);

      xl = lp_build_add(&wide_bld, xl,
                        LLVMBuildAShr(bld->builder, xl, shift, ""));
      xh = lp_build_add(&wide_bld, xh,
                        LLVMBuildAShr(bld->builder, xh, shift, ""));

      /*
       * Lerp both halves.
       */

      resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l);
      resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h);

      res = lp_build_pack2(bld->builder, wide_type, type, resl, resh);
   } else {
      res = lp_build_lerp_simple(bld, x, v0, v1);
   }

   return res;
 }
@@ -923,35 +989,122 @@ lp_build_round_sse41(struct lp_build_context *bld,
                     enum lp_build_round_sse41_mode mode)
 {
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef i32t = LLVMInt32Type();
   const char *intrinsic;
   LLVMValueRef res;

   assert(type.floating);
   assert(type.width*type.length == 128);

   assert(lp_check_value(type, a));
   assert(util_cpu_caps.has_sse4_1);

   switch(type.width) {
   case 32:
      intrinsic = "llvm.x86.sse41.round.ps";
      break;
   case 64:
      intrinsic = "llvm.x86.sse41.round.pd";
      break;
   default:
      assert(0);
      return bld->undef;
   if (type.length == 1) {
      LLVMTypeRef vec_type;
      LLVMValueRef undef;
      LLVMValueRef args[3];
      LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);

      switch(type.width) {
      case 32:
         intrinsic = "llvm.x86.sse41.round.ss";
         break;
      case 64:
         intrinsic = "llvm.x86.sse41.round.sd";
         break;
      default:
         assert(0);
         return bld->undef;
      }

      vec_type = LLVMVectorType(bld->elem_type, 4);

      undef = LLVMGetUndef(vec_type);

      args[0] = undef;
      args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
      args[2] = LLVMConstInt(i32t, mode, 0);

      res = lp_build_intrinsic(bld->builder, intrinsic,
                               vec_type, args, Elements(args));

      res = LLVMBuildExtractElement(bld->builder, res, index0, "");
   }
   else {
      assert(type.width*type.length == 128);

      switch(type.width) {
      case 32:
         intrinsic = "llvm.x86.sse41.round.ps";
         break;
      case 64:
         intrinsic = "llvm.x86.sse41.round.pd";
         break;
      default:
         assert(0);
         return bld->undef;
      }

      res = lp_build_intrinsic_binary(bld->builder, intrinsic,
                                      bld->vec_type, a,
                                      LLVMConstInt(i32t, mode, 0));
   }

   return res;
 }


 static INLINE LLVMValueRef
 lp_build_iround_nearest_sse2(struct lp_build_context *bld,
                             LLVMValueRef a)
 {
   const struct lp_type type = bld->type;
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMTypeRef ret_type = lp_build_int_vec_type(type);
   const char *intrinsic;
   LLVMValueRef res;

   assert(type.floating);
   /* using the double precision conversions is a bit more complicated */
   assert(type.width == 32);

   assert(lp_check_value(type, a));
   assert(util_cpu_caps.has_sse2);

   /* This is relying on MXCSR rounding mode, which should always be nearest. */
   if (type.length == 1) {
      LLVMTypeRef vec_type;
      LLVMValueRef undef;
      LLVMValueRef arg;
      LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);

      vec_type = LLVMVectorType(bld->elem_type, 4);

      intrinsic = "llvm.x86.sse.cvtss2si";

      undef = LLVMGetUndef(vec_type);

      arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");

      res = lp_build_intrinsic_unary(bld->builder, intrinsic,
                                     ret_type, arg);
   }
   else {
      assert(type.width*type.length == 128);

      intrinsic = "llvm.x86.sse2.cvtps2dq";

      res = lp_build_intrinsic_unary(bld->builder, intrinsic,
                                     ret_type, a);
   }

   return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
                                    LLVMConstInt(LLVMInt32Type(), mode, 0));
   return res;
 }


 /**
 * Return the integer part of a float (vector) value.  The returned value is
 * a float (vector).
 * Ex: trunc(-1.5) = 1.0
 * Return the integer part of a float (vector) value (== round toward zero).
 * The returned value is a float (vector).
 * Ex: trunc(-1.5) = -1.0
 */
 LLVMValueRef
 lp_build_trunc(struct lp_build_context *bld,
@@ -962,8 +1115,10 @@ lp_build_trunc(struct lp_build_context *bld,
   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
@@ -990,8 +1145,10 @@ lp_build_round(struct lp_build_context *bld,
   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMValueRef res;
@@ -1016,8 +1173,10 @@ lp_build_floor(struct lp_build_context *bld,
   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMValueRef res;
@@ -1042,8 +1201,10 @@ lp_build_ceil(struct lp_build_context *bld,
   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMValueRef res;
@@ -1068,9 +1229,9 @@ lp_build_fract(struct lp_build_context *bld,


 /**
 * Return the integer part of a float (vector) value.  The returned value is
 * an integer (vector).
 * Ex: itrunc(-1.5) = 1
 * Return the integer part of a float (vector) value (== round toward zero).
 * The returned value is an integer (vector).
 * Ex: itrunc(-1.5) = -1
 */
 LLVMValueRef
 lp_build_itrunc(struct lp_build_context *bld,
@@ -1097,31 +1258,40 @@ lp_build_iround(struct lp_build_context *bld,
                LLVMValueRef a)
 {
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMValueRef res;

   assert(type.floating);

   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
   if (util_cpu_caps.has_sse2 &&
       ((type.width == 32) && (type.length == 1 || type.length == 4))) {
      return lp_build_iround_nearest_sse2(bld, a);
   }
   else if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef half;

      /* get sign bit */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");

      /* sign * 0.5 */
      half = lp_build_const_vec(type, 0.5);
      half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
      half = LLVMBuildOr(bld->builder, sign, half, "");
      half = LLVMBuildBitCast(bld->builder, half, vec_type, "");

      if (type.sign) {
         LLVMTypeRef vec_type = bld->vec_type;
         LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
         LLVMValueRef sign;

         /* get sign bit */
         sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
         sign = LLVMBuildAnd(bld->builder, sign, mask, "");

         /* sign * 0.5 */
         half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
         half = LLVMBuildOr(bld->builder, sign, half, "");
         half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
      }

      res = LLVMBuildFAdd(bld->builder, a, half, "");
   }
@@ -1142,37 +1312,42 @@ lp_build_ifloor(struct lp_build_context *bld,
                LLVMValueRef a)
 {
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   }
   else {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      unsigned mantissa = lp_mantissa(type);
      LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef offset;

      /* sign = a < 0 ? ~0 : 0 */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");

      /* offset = -0.99999(9)f */
      offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
      offset = LLVMConstBitCast(offset, int_vec_type);

      /* offset = a < 0 ? offset : 0.0f */
      offset = LLVMBuildAnd(bld->builder, offset, sign, "");
      offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");

      res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
      res = a;

      if (type.sign) {
         /* Take the sign bit and add it to 1 constant */
         LLVMTypeRef vec_type = bld->vec_type;
         unsigned mantissa = lp_mantissa(type);
         LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
         LLVMValueRef sign;
         LLVMValueRef offset;

         /* sign = a < 0 ? ~0 : 0 */
         sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
         sign = LLVMBuildAnd(bld->builder, sign, mask, "");
         sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");

         /* offset = -0.99999(9)f */
         offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
         offset = LLVMConstBitCast(offset, int_vec_type);

         /* offset = a < 0 ? offset : 0.0f */
         offset = LLVMBuildAnd(bld->builder, offset, sign, "");
         offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");

         res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res");
      }
   }

   /* round to nearest (toward zero) */
@@ -1192,35 +1367,39 @@ lp_build_iceil(struct lp_build_context *bld,
               LLVMValueRef a)
 {
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMTypeRef vec_type = bld->vec_type;
      unsigned mantissa = lp_mantissa(type);
      LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef offset;

      /* sign = a < 0 ? 0 : ~0 */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
      sign = LLVMBuildNot(bld->builder, sign, "iceil.not");

      /* offset = 0.99999(9)f */
      offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
      offset = LLVMConstBitCast(offset, int_vec_type);

      /* offset = a < 0 ? 0.0 : offset */
      offset = LLVMBuildAnd(bld->builder, offset, sign, "");
      offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
      if (type.sign) {
         LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
         LLVMValueRef sign;

         /* sign = a < 0 ? 0 : ~0 */
         sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
         sign = LLVMBuildAnd(bld->builder, sign, mask, "");
         sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
         sign = LLVMBuildNot(bld->builder, sign, "iceil.not");

         /* offset = a < 0 ? 0.0 : offset */
         offset = LLVMConstBitCast(offset, int_vec_type);
         offset = LLVMBuildAnd(bld->builder, offset, sign, "");
         offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
      }

      res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
   }
@@ -1232,6 +1411,46 @@ lp_build_iceil(struct lp_build_context *bld,
 }


 /**
 * Combined ifloor() & fract().
 *
 * Preferred to calling the functions separately, as it will ensure that the
 * stratergy (floor() vs ifloor()) that results in less redundant work is used.
 */
 void
 lp_build_ifloor_fract(struct lp_build_context *bld,
                      LLVMValueRef a,
                      LLVMValueRef *out_ipart,
                      LLVMValueRef *out_fpart)
 {
   const struct lp_type type = bld->type;
   LLVMValueRef ipart;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if (util_cpu_caps.has_sse4_1 &&
       (type.length == 1 || type.width*type.length == 128)) {
      /*
       * floor() is easier.
       */

      ipart = lp_build_floor(bld, a);
      *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
      *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
   }
   else {
      /*
       * ifloor() is easier.
       */

      *out_ipart = lp_build_ifloor(bld, a);
      ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
      *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
   }
 }


 LLVMValueRef
 lp_build_sqrt(struct lp_build_context *bld,
              LLVMValueRef a)
@@ -2040,6 +2259,71 @@ lp_build_exp2(struct lp_build_context *bld,
 }


 /**
 * Extract the exponent of a IEEE-754 floating point value.
 *
 * Optionally apply an integer bias.
 *
 * Result is an integer value with
 *
 *   ifloor(log2(x)) + bias
 */
 LLVMValueRef
 lp_build_extract_exponent(struct lp_build_context *bld,
                          LLVMValueRef x,
                          int bias)
 {
   const struct lp_type type = bld->type;
   unsigned mantissa = lp_mantissa(type);
   LLVMValueRef res;

   assert(type.floating);

   assert(lp_check_value(bld->type, x));

   x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");

   res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
   res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), "");
   res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), "");

   return res;
 }


 /**
 * Extract the mantissa of the a floating.
 *
 * Result is a floating point value with
 *
 *   x / floor(log2(x))
 */
 LLVMValueRef
 lp_build_extract_mantissa(struct lp_build_context *bld,
                          LLVMValueRef x)
 {
   const struct lp_type type = bld->type;
   unsigned mantissa = lp_mantissa(type);
   LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
   LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type);
   LLVMValueRef res;

   assert(lp_check_value(bld->type, x));

   assert(type.floating);

   x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");

   /* res = x / 2**ipart */
   res = LLVMBuildAnd(bld->builder, x, mantmask, "");
   res = LLVMBuildOr(bld->builder, res, one, "");
   res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");

   return res;
 }



 /**
 * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
 * These coefficients can be generate with
@@ -2159,3 +2443,62 @@ lp_build_log2(struct lp_build_context *bld,
   lp_build_log2_approx(bld, x, NULL, NULL, &res);
   return res;
 }


 /**
 * Faster (and less accurate) log2.
 *
 *    log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x))
 *
 * Piece-wise linear approximation, with exact results when x is a
 * power of two.
 *
 * See http://www.flipcode.com/archives/Fast_log_Function.shtml
 */
 LLVMValueRef
 lp_build_fast_log2(struct lp_build_context *bld,
                   LLVMValueRef x)
 {
   LLVMValueRef ipart;
   LLVMValueRef fpart;

   assert(lp_check_value(bld->type, x));

   assert(bld->type.floating);

   /* ipart = floor(log2(x)) - 1 */
   ipart = lp_build_extract_exponent(bld, x, -1);
   ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, "");

   /* fpart = x / 2**ipart */
   fpart = lp_build_extract_mantissa(bld, x);

   /* ipart + fpart */
   return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
 }


 /**
 * Fast implementation of iround(log2(x)).
 *
 * Not an approximation -- it should give accurate results all the time.
 */
 LLVMValueRef
 lp_build_ilog2(struct lp_build_context *bld,
               LLVMValueRef x)
 {
   LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2);
   LLVMValueRef ipart;

   assert(bld->type.floating);

   assert(lp_check_value(bld->type, x));

   /* x * 2^(0.5)   i.e., add 0.5 to the log2(x) */
   x = LLVMBuildFMul(bld->builder, x, sqrt2, "");

   /* ipart = floor(log2(x) + 0.5)  */
   ipart = lp_build_extract_exponent(bld, x, 0);

   return ipart;
 }
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -171,6 +171,12 @@ LLVMValueRef
 lp_build_itrunc(struct lp_build_context *bld,
                LLVMValueRef a);

 void
 lp_build_ifloor_fract(struct lp_build_context *bld,
                      LLVMValueRef a,
                      LLVMValueRef *out_ipart,
                      LLVMValueRef *out_fpart);

 LLVMValueRef
 lp_build_sqrt(struct lp_build_context *bld,
              LLVMValueRef a);
@@ -208,10 +214,27 @@ LLVMValueRef
 lp_build_exp2(struct lp_build_context *bld,
              LLVMValueRef a);

 LLVMValueRef
 lp_build_extract_exponent(struct lp_build_context *bld,
                          LLVMValueRef x,
                          int bias);

 LLVMValueRef
 lp_build_extract_mantissa(struct lp_build_context *bld,
                          LLVMValueRef x);

 LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
              LLVMValueRef a);

 LLVMValueRef
 lp_build_fast_log2(struct lp_build_context *bld,
                   LLVMValueRef a);

 LLVMValueRef
 lp_build_ilog2(struct lp_build_context *bld,
               LLVMValueRef x);

 void
 lp_build_exp2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -63,6 +63,7 @@

 #include "util/u_debug.h"
 #include "util/u_math.h"
 #include "util/u_cpu_detect.h"

 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
@@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(src_type.floating);
   assert(dst_width <= src_type.width);
   src_type.sign = FALSE;

   mantissa = lp_mantissa(src_type);

   /* We cannot carry more bits than the mantissa */
   n = MIN2(mantissa, dst_width);
   if (dst_width <= mantissa) {
      /*
       * Apply magic coefficients that will make the desired result to appear
       * in the lowest significant bits of the mantissa, with correct rounding.
       *
       * This only works if the destination width fits in the mantissa.
       */

   /* This magic coefficients will make the desired result to appear in the
    * lowest significant bits of the mantissa.
    */
   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)mask/ubound;
   bias = (double)((unsigned long long)1 << (mantissa - n));
      unsigned long long ubound;
      unsigned long long mask;
      double scale;
      double bias;

   res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
   res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
   res = LLVMBuildBitCast(builder, res, int_vec_type, "");
      ubound = (1ULL << dst_width);
      mask = ubound - 1;
      scale = (double)mask/ubound;
      bias = (double)(1ULL << (mantissa - dst_width));

   if(dst_width > n) {
      int shift = dst_width - n;
      res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), "");
      res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
      res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
      res = LLVMBuildBitCast(builder, res, int_vec_type, "");
      res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
   }
   else if (dst_width == (mantissa + 1)) {
      /*
       * The destination width matches exactly what can be represented in
       * floating point (i.e., mantissa + 1 bits). So do a straight
       * multiplication followed by casting. No further rounding is necessary.
       */

      double scale;

      /* TODO: Fill in the empty lower bits for additional precision? */
      /* YES: this fixes progs/trivial/tri-z-eq.c.
       * Otherwise vertex Z=1.0 values get converted to something like
       * 0xfffffb00 and the test for equality with 0xffffffff fails.
      scale = (double)((1ULL << dst_width) - 1);

      res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
      res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
   }
   else {
      /*
       * The destination exceeds what can be represented in the floating point.
       * So multiply by the largest power two we get away with, and when
       * subtract the most significant bit to rescale to normalized values.
       *
       * The largest power of two factor we can get away is
       * (1 << (src_type.width - 1)), because we need to use signed . In theory it
       * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
       * INT_MIN should be returned in FPToSI, which is the correct result for
       * values near 1.0!
       *
       * This means we get (src_type.width - 1) correct bits for values near 0.0,
       * and (mantissa + 1) correct bits for values near 1.0. Equally or more
       * important, we also get exact results for 0.0 and 1.0.
       */
 #if 0
      {
         LLVMValueRef msb;
         msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
         msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
         msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
         res = LLVMBuildOr(builder, res, msb, "");
      }
 #elif 0
      while(shift > 0) {
         res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
         shift -= n;
         n *= 2;

      unsigned n = MIN2(src_type.width - 1, dst_width);

      double scale = (double)(1ULL << n);
      unsigned lshift = dst_width - n;
      unsigned rshift = n;
      LLVMValueRef lshifted;
      LLVMValueRef rshifted;

      res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
      res = LLVMBuildFPToSI(builder, res, int_vec_type, "");

      /*
       * Align the most significant bit to its final place.
       *
       * This will cause 1.0 to overflow to 0, but the later adjustment will
       * get it right.
       */
      if (lshift) {
         lshifted = LLVMBuildShl(builder, res,
                                 lp_build_const_int_vec(src_type, lshift), "");
      } else {
         lshifted = res;
      }
 #endif

      /*
       * Align the most significant bit to the right.
       */
      rshifted =  LLVMBuildAShr(builder, res,
                                lp_build_const_int_vec(src_type, rshift), "");

      /*
       * Subtract the MSB to the LSB, therefore re-scaling from
       * (1 << dst_width) to ((1 << dst_width) - 1).
       */

      res = LLVMBuildSub(builder, lshifted, rshifted, "");
   }
   else
      res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");

   return res;
 }
@@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,

   assert(dst_type.floating);

   /* Special-case int8->float, though most cases could be handled
    * this way:
    */
   if (src_width == 8) {
      scale = 1.0/255.0;
      res = LLVMBuildSIToFP(builder, src, vec_type, "");
      res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
      return res;
   }

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);
@@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder,
   }
   num_tmps = num_srcs;


   /* Special case 4x4f --> 1x16ub 
    */
   if (src_type.floating == 1 &&
       src_type.fixed    == 0 &&
       src_type.sign     == 1 &&
       src_type.norm     == 0 &&
       src_type.width    == 32 &&
       src_type.length   == 4 &&

       dst_type.floating == 0 &&
       dst_type.fixed    == 0 &&
       dst_type.sign     == 0 &&
       dst_type.norm     == 1 &&
       dst_type.width    == 8 &&
       dst_type.length   == 16 &&

       util_cpu_caps.has_sse2)
   {
      int i;

      for (i = 0; i < num_dsts; i++, src += 4) {
         struct lp_type int16_type = dst_type;
         struct lp_type int32_type = dst_type;
         LLVMValueRef lo, hi;
         LLVMValueRef src_int0;
         LLVMValueRef src_int1;
         LLVMValueRef src_int2;
         LLVMValueRef src_int3;
         LLVMTypeRef int16_vec_type;
         LLVMTypeRef int32_vec_type;
         LLVMTypeRef src_vec_type;
         LLVMTypeRef dst_vec_type;
         LLVMValueRef const_255f;
         LLVMValueRef a, b, c, d;

         int16_type.width *= 2;
         int16_type.length /= 2;
         int16_type.sign = 1;

         int32_type.width *= 4;
         int32_type.length /= 4;
         int32_type.sign = 1;

         src_vec_type   = lp_build_vec_type(src_type);
         dst_vec_type   = lp_build_vec_type(dst_type);
         int16_vec_type = lp_build_vec_type(int16_type);
         int32_vec_type = lp_build_vec_type(int32_type);

         const_255f = lp_build_const_vec(src_type, 255.0f);

         a = LLVMBuildFMul(builder, src[0], const_255f, "");
         b = LLVMBuildFMul(builder, src[1], const_255f, "");
         c = LLVMBuildFMul(builder, src[2], const_255f, "");
         d = LLVMBuildFMul(builder, src[3], const_255f, "");

         {
            struct lp_build_context bld;

            bld.builder = builder;
            bld.type = src_type;
            bld.vec_type = src_vec_type;
            bld.int_elem_type = lp_build_elem_type(int32_type);
            bld.int_vec_type = int32_vec_type;
            bld.undef = lp_build_undef(src_type);
            bld.zero = lp_build_zero(src_type);
            bld.one = lp_build_one(src_type);

            src_int0 = lp_build_iround(&bld, a);
            src_int1 = lp_build_iround(&bld, b);
            src_int2 = lp_build_iround(&bld, c);
            src_int3 = lp_build_iround(&bld, d);
         }
         /* relying on clamping behavior of sse2 intrinsics here */
         lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1);
         hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3);
         dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi);
      }
      return; 
   }

   /*
    * Clamp if necessary
    */
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -57,6 +57,8 @@ lp_disassemble(const void* func)
 #ifdef HAVE_UDIS86
   ud_t ud_obj;
   uint64_t max_jmp_pc;
   uint inst_no;
   boolean emit_addrs = TRUE, emit_line_nos = FALSE;

   ud_init(&ud_obj);

@@ -76,13 +78,18 @@ lp_disassemble(const void* func)

   while (ud_disassemble(&ud_obj)) {

      if (emit_addrs) {
 #ifdef PIPE_ARCH_X86
      debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
         debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
 #endif
 #ifdef PIPE_ARCH_X86_64
      debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
         debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
 #endif

      }
      else if (emit_line_nos) {
         debug_printf("%6d:\t", inst_no);
         inst_no++;
      }
 #if 0
      debug_printf("%-16s ", ud_insn_hex(&ud_obj));
 #endif
@@ -115,8 +122,10 @@ lp_disassemble(const void* func)
         }
      }

      if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
           ud_obj.mnemonic == UD_Iinvalid)
      if (ud_obj.mnemonic == UD_Iinvalid ||
          (ud_insn_off(&ud_obj) >= max_jmp_pc &&
           (ud_obj.mnemonic == UD_Iret ||
            ud_obj.mnemonic == UD_Ijmp)))
         break;
   }

--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -36,11 +36,12 @@
 #include "util/u_string.h"


 #define GALLIVM_DEBUG_TGSI      0x1
 #define GALLIVM_DEBUG_IR        0x2
 #define GALLIVM_DEBUG_ASM       0x4
 #define GALLIVM_DEBUG_NO_OPT    0x8
 #define GALLIVM_DEBUG_PERF      0x10
 #define GALLIVM_DEBUG_TGSI          (1 << 0)
 #define GALLIVM_DEBUG_IR            (1 << 1)
 #define GALLIVM_DEBUG_ASM           (1 << 2)
 #define GALLIVM_DEBUG_NO_OPT        (1 << 3)
 #define GALLIVM_DEBUG_PERF          (1 << 4)
 #define GALLIVM_DEBUG_NO_BRILINEAR  (1 << 5)


 #ifdef DEBUG
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -38,273 +38,15 @@
 #include "lp_bld_flow.h"


 #define LP_BUILD_FLOW_MAX_VARIABLES 64
 #define LP_BUILD_FLOW_MAX_DEPTH 32

 /**
 * Enumeration of all possible flow constructs.
 */
 enum lp_build_flow_construct_kind {
   LP_BUILD_FLOW_SCOPE,
   LP_BUILD_FLOW_SKIP,
   LP_BUILD_FLOW_IF
 };


 /**
 * Variable declaration scope.
 */
 struct lp_build_flow_scope
 {
   /** Number of variables declared in this scope */
   unsigned num_variables;
 };


 /**
 * Early exit. Useful to skip to the end of a function or block when
 * the execution mask becomes zero or when there is an error condition.
 */
 struct lp_build_flow_skip
 {
   /** Block to skip to */
   LLVMBasicBlockRef block;

   /** Number of variables declared at the beginning */
   unsigned num_variables;

   LLVMValueRef *phi;  /**< array [num_variables] */
 };


 /**
 * if/else/endif.
 */
 struct lp_build_flow_if
 {
   unsigned num_variables;

   LLVMValueRef *phi;  /**< array [num_variables] */

   LLVMValueRef condition;
   LLVMBasicBlockRef entry_block, true_block, false_block, merge_block;
 };


 /**
 * Union of all possible flow constructs' data
 */
 union lp_build_flow_construct_data
 {
   struct lp_build_flow_scope scope;
   struct lp_build_flow_skip skip;
   struct lp_build_flow_if ifthen;
 };


 /**
 * Element of the flow construct stack.
 */
 struct lp_build_flow_construct
 {
   enum lp_build_flow_construct_kind kind;
   union lp_build_flow_construct_data data;
 };


 /**
 * All necessary data to generate LLVM control flow constructs.
 * Insert a new block, right where builder is pointing to.
 *
 * Besides keeping track of the control flow construct themselves we also
 * need to keep track of variables in order to generate SSA Phi values.
 */
 struct lp_build_flow_context
 {
   LLVMBuilderRef builder;

   /**
    * Control flow stack.
    */
   struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
   unsigned num_constructs;

   /**
    * Variable stack
    */
   LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
   unsigned num_variables;
 };


 struct lp_build_flow_context *
 lp_build_flow_create(LLVMBuilderRef builder)
 {
   struct lp_build_flow_context *flow;

   flow = CALLOC_STRUCT(lp_build_flow_context);
   if(!flow)
      return NULL;

   flow->builder = builder;

   return flow;
 }


 void
 lp_build_flow_destroy(struct lp_build_flow_context *flow)
 {
   assert(flow->num_constructs == 0);
   assert(flow->num_variables == 0);
   FREE(flow);
 }


 /**
 * Begin/push a new flow control construct, such as a loop, skip block
 * or variable scope.
 */
 static union lp_build_flow_construct_data *
 lp_build_flow_push(struct lp_build_flow_context *flow,
                   enum lp_build_flow_construct_kind kind)
 {
   assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
   if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
      return NULL;

   flow->constructs[flow->num_constructs].kind = kind;
   return &flow->constructs[flow->num_constructs++].data;
 }


 /**
 * Return the current/top flow control construct on the stack.
 * \param kind  the expected type of the top-most construct
 */
 static union lp_build_flow_construct_data *
 lp_build_flow_peek(struct lp_build_flow_context *flow,
                   enum lp_build_flow_construct_kind kind)
 {
   assert(flow->num_constructs);
   if(!flow->num_constructs)
      return NULL;

   assert(flow->constructs[flow->num_constructs - 1].kind == kind);
   if(flow->constructs[flow->num_constructs - 1].kind != kind)
      return NULL;

   return &flow->constructs[flow->num_constructs - 1].data;
 }


 /**
 * End/pop the current/top flow control construct on the stack.
 * \param kind  the expected type of the top-most construct
 */
 static union lp_build_flow_construct_data *
 lp_build_flow_pop(struct lp_build_flow_context *flow,
                  enum lp_build_flow_construct_kind kind)
 {
   assert(flow->num_constructs);
   if(!flow->num_constructs)
      return NULL;

   assert(flow->constructs[flow->num_constructs - 1].kind == kind);
   if(flow->constructs[flow->num_constructs - 1].kind != kind)
      return NULL;

   return &flow->constructs[--flow->num_constructs].data;
 }


 /**
 * Begin a variable scope.
 * This is useful important not only for aesthetic reasons, but also for
 * performance reasons, as frequently run blocks should be laid out next to
 * each other and fall-throughs maximized.
 *
 * See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp.
 *
 */
 void
 lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
 {
   struct lp_build_flow_scope *scope;

   scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope;
   if(!scope)
      return;

   scope->num_variables = 0;
 }


 /**
 * Declare a variable.
 *
 * A variable is a named entity which can have different LLVMValueRef's at
 * different points of the program. This is relevant for control flow because
 * when there are multiple branches to a same location we need to replace
 * the variable's value with a Phi function as explained in
 * http://en.wikipedia.org/wiki/Static_single_assignment_form .
 *
 * We keep track of variables by keeping around a pointer to where they're
 * current.
 *
 * There are a few cautions to observe:
 *
 * - Variable's value must not be NULL. If there is no initial value then
 *   LLVMGetUndef() should be used.
 *
 * - Variable's value must be kept up-to-date. If the variable is going to be
 *   modified by a function then a pointer should be passed so that its value
 *   is accurate. Failure to do this will cause some of the variables'
 *   transient values to be lost, leading to wrong results.
 *
 * - A program should be written from top to bottom, by always appending
 *   instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
 *   modifying existing statements will most likely lead to wrong results.
 *
 */
 void
 lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
                            LLVMValueRef *variable)
 {
   struct lp_build_flow_scope *scope;

   scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope;
   if(!scope)
      return;

   assert(*variable);
   if(!*variable)
      return;

   assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
   if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
      return;

   flow->variables[flow->num_variables++] = variable;
   ++scope->num_variables;
 }


 void
 lp_build_flow_scope_end(struct lp_build_flow_context *flow)
 {
   struct lp_build_flow_scope *scope;

   scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope;
   if(!scope)
      return;

   assert(flow->num_variables >= scope->num_variables);
   if(flow->num_variables < scope->num_variables) {
      flow->num_variables = 0;
      return;
   }

   flow->num_variables -= scope->num_variables;
 }


 /**
 * Note: this function has no dependencies on the flow code and could
 * be used elsewhere.
 */
@@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
 }


 static LLVMBasicBlockRef
 lp_build_flow_insert_block(struct lp_build_flow_context *flow)
 {
   return lp_build_insert_new_block(flow->builder, "");
 }


 /**
 * Begin a "skip" block.  Inside this block we can test a condition and
 * skip to the end of the block if the condition is false.
 */
 void
 lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
 lp_build_flow_skip_begin(struct lp_build_skip_context *skip,
                         LLVMBuilderRef builder)
 {
   struct lp_build_flow_skip *skip;
   LLVMBuilderRef builder;
   unsigned i;

   skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
   if(!skip)
      return;
   skip->builder = builder;

   /* create new basic block */
   skip->block = lp_build_flow_insert_block(flow);

   skip->num_variables = flow->num_variables;
   if(!skip->num_variables) {
      skip->phi = NULL;
      return;
   }

   /* Allocate a Phi node for each variable in this skip scope */
   skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
   if(!skip->phi) {
      skip->num_variables = 0;
      return;
   }

   builder = LLVMCreateBuilder();
   LLVMPositionBuilderAtEnd(builder, skip->block);

   /* create a Phi node for each variable */
   for(i = 0; i < skip->num_variables; ++i)
      skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");

   LLVMDisposeBuilder(builder);
   skip->block = lp_build_insert_new_block(skip->builder, "skip");
 }


@@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
 * skip block if the condition is true.
 */
 void
 lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
 lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip,
                              LLVMValueRef cond)
 {
   struct lp_build_flow_skip *skip;
   LLVMBasicBlockRef current_block;
   LLVMBasicBlockRef new_block;
   unsigned i;

   skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
   if(!skip)
      return;

   current_block = LLVMGetInsertBlock(flow->builder);

   new_block = lp_build_flow_insert_block(flow);

   /* for each variable, update the Phi node with a (variable, block) pair */
   for(i = 0; i < skip->num_variables; ++i) {
      assert(*flow->variables[i]);
      assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
      LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
   }
   new_block = lp_build_insert_new_block(skip->builder, "");

   /* if cond is true, goto skip->block, else goto new_block */
   LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
   LLVMBuildCondBr(skip->builder, cond, skip->block, new_block);

   LLVMPositionBuilderAtEnd(flow->builder, new_block);
   LLVMPositionBuilderAtEnd(skip->builder, new_block);
 }


 void
 lp_build_flow_skip_end(struct lp_build_flow_context *flow)
 lp_build_flow_skip_end(struct lp_build_skip_context *skip)
 {
   struct lp_build_flow_skip *skip;
   LLVMBasicBlockRef current_block;
   unsigned i;

   skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
   if(!skip)
      return;

   current_block = LLVMGetInsertBlock(flow->builder);

   /* add (variable, block) tuples to the phi nodes */
   for(i = 0; i < skip->num_variables; ++i) {
      assert(*flow->variables[i]);
      assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
      LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
      *flow->variables[i] = skip->phi[i];
   }

   /* goto block */
   LLVMBuildBr(flow->builder, skip->block);
   LLVMPositionBuilderAtEnd(flow->builder, skip->block);

   FREE(skip->phi);
   LLVMBuildBr(skip->builder, skip->block);
   LLVMPositionBuilderAtEnd(skip->builder, skip->block);
 }


 /**
 * Check if the mask predicate is zero.  If so, jump to the end of the block.
 */
 static void
 void
 lp_build_mask_check(struct lp_build_mask_context *mask)
 {
   LLVMBuilderRef builder = mask->flow->builder;
   LLVMBuilderRef builder = mask->skip.builder;
   LLVMValueRef value;
   LLVMValueRef cond;

   value = lp_build_mask_value(mask);

   /* cond = (mask == 0) */
   cond = LLVMBuildICmp(builder,
                        LLVMIntEQ,
                        LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
                        LLVMBuildBitCast(builder, value, mask->reg_type, ""),
                        LLVMConstNull(mask->reg_type),
                        "");

   /* if cond, goto end of block */
   lp_build_flow_skip_cond_break(mask->flow, cond);
   lp_build_flow_skip_cond_break(&mask->skip, cond);
 }


@@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask)
 */
 void
 lp_build_mask_begin(struct lp_build_mask_context *mask,
                    struct lp_build_flow_context *flow,
                    LLVMBuilderRef builder,
                    struct lp_type type,
                    LLVMValueRef value)
 {
   memset(mask, 0, sizeof *mask);

   mask->flow = flow;
   mask->reg_type = LLVMIntType(type.width * type.length);
   mask->value = value;
   mask->var = lp_build_alloca(builder,
                               lp_build_int_vec_type(type),
                               "execution_mask");

   lp_build_flow_scope_begin(flow);
   lp_build_flow_scope_declare(flow, &mask->value);
   lp_build_flow_skip_begin(flow);
   LLVMBuildStore(builder, value, mask->var);

   lp_build_mask_check(mask);
   lp_build_flow_skip_begin(&mask->skip, builder);
 }


 LLVMValueRef
 lp_build_mask_value(struct lp_build_mask_context *mask)
 {
   return LLVMBuildLoad(mask->skip.builder, mask->var, "");
 }


@@ -504,9 +185,10 @@ void
 lp_build_mask_update(struct lp_build_mask_context *mask,
                     LLVMValueRef value)
 {
   mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");

   lp_build_mask_check(mask);
   value = LLVMBuildAnd(mask->skip.builder,
                        lp_build_mask_value(mask),
                        value, "");
   LLVMBuildStore(mask->skip.builder, value, mask->var);
 }


@@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
 LLVMValueRef
 lp_build_mask_end(struct lp_build_mask_context *mask)
 {
   lp_build_flow_skip_end(mask->flow);
   lp_build_flow_scope_end(mask->flow);
   return mask->value;
   lp_build_flow_skip_end(&mask->skip);
   return lp_build_mask_value(mask);
 }


@@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder,
                    LLVMValueRef start,
                    struct lp_build_loop_state *state)
 {
   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
   LLVMValueRef function = LLVMGetBasicBlockParent(block);
   state->block = lp_build_insert_new_block(builder, "loop_begin");

   state->block = LLVMAppendBasicBlock(function, "loop");
   state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter");

   LLVMBuildStore(builder, start, state->counter_var);

   LLVMBuildBr(builder, state->block);

   LLVMPositionBuilderAtEnd(builder, state->block);

   state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");

   LLVMAddIncoming(state->counter, &start, &block, 1);

   state->counter = LLVMBuildLoad(builder, state->counter_var, "");
 }


 void
 lp_build_loop_end(LLVMBuilderRef builder,
                  LLVMValueRef end,
                  LLVMValueRef step,
                  struct lp_build_loop_state *state)
 {
   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
   LLVMValueRef function = LLVMGetBasicBlockParent(block);
   LLVMValueRef next;
   LLVMValueRef cond;
   LLVMBasicBlockRef after_block;

   if (!step)
      step = LLVMConstInt(LLVMTypeOf(end), 1, 0);

   next = LLVMBuildAdd(builder, state->counter, step, "");

   cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");

   after_block = LLVMAppendBasicBlock(function, "");

   LLVMBuildCondBr(builder, cond, after_block, state->block);

   LLVMAddIncoming(state->counter, &next, &block, 1);

   LLVMPositionBuilderAtEnd(builder, after_block);
 }

 void
 lp_build_loop_end_cond(LLVMBuilderRef builder,
                       LLVMValueRef end,
                       LLVMValueRef step,
                       int llvm_cond,
                       LLVMIntPredicate llvm_cond,
                       struct lp_build_loop_state *state)
 {
   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
   LLVMValueRef function = LLVMGetBasicBlockParent(block);
   LLVMValueRef next;
   LLVMValueRef cond;
   LLVMBasicBlockRef after_block;
@@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,

   next = LLVMBuildAdd(builder, state->counter, step, "");

   LLVMBuildStore(builder, next, state->counter_var);

   cond = LLVMBuildICmp(builder, llvm_cond, next, end, "");

   after_block = LLVMAppendBasicBlock(function, "");
   after_block = lp_build_insert_new_block(builder, "loop_end");

   LLVMBuildCondBr(builder, cond, after_block, state->block);

   LLVMAddIncoming(state->counter, &next, &block, 1);

   LLVMPositionBuilderAtEnd(builder, after_block);

   state->counter = LLVMBuildLoad(builder, state->counter_var, "");
 }


 void
 lp_build_loop_end(LLVMBuilderRef builder,
                  LLVMValueRef end,
                  LLVMValueRef step,
                  struct lp_build_loop_state *state)
 {
   lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state);
 }


@@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,

  Is built with:

     LLVMValueRef x = LLVMGetUndef();  // or something else
     // x needs an alloca variable
     x = lp_build_alloca(builder, type, "x");

     flow = lp_build_flow_create(builder);

        lp_build_flow_scope_begin(flow);
     lp_build_if(ctx, builder, cond);
        LLVMBuildStore(LLVMBuildAdd(1, 2), x);
     lp_build_else(ctx);
        LLVMBuildStore(LLVMBuildAdd(2, 3). x);
     lp_build_endif(ctx);

           // x needs a phi node
           lp_build_flow_scope_declare(flow, &x);

           lp_build_if(ctx, flow, builder, cond);
              x = LLVMAdd(1, 2);
           lp_build_else(ctx);
              x = LLVMAdd(2, 3);
           lp_build_endif(ctx);

        lp_build_flow_scope_end(flow);

     lp_build_flow_destroy(flow);
 */


@@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
 * Begin an if/else/endif construct.
 */
 void
 lp_build_if(struct lp_build_if_state *ctx,
            struct lp_build_flow_context *flow,
 lp_build_if(struct lp_build_if_state *ifthen,
            LLVMBuilderRef builder,
            LLVMValueRef condition)
 {
   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
   struct lp_build_flow_if *ifthen;
   unsigned i;

   memset(ctx, 0, sizeof(*ctx));
   ctx->builder = builder;
   ctx->flow = flow;

   /* push/create new scope */
   ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen;
   assert(ifthen);

   ifthen->num_variables = flow->num_variables;
   memset(ifthen, 0, sizeof *ifthen);
   ifthen->builder = builder;
   ifthen->condition = condition;
   ifthen->entry_block = block;

   /* create a Phi node for each variable in this flow scope */
   ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi));
   if (!ifthen->phi) {
      ifthen->num_variables = 0;
      return;
   }

   /* create endif/merge basic block for the phi functions */
   ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block");
   LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);

   /* create a phi node for each variable */
   for (i = 0; i < flow->num_variables; i++) {
      ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");

      /* add add the initial value of the var from the entry block */
      if (!LLVMIsUndef(*flow->variables[i]))
         LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
                         &ifthen->entry_block, 1);
   }

   /* create/insert true_block before merge_block */
   ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block");
@@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx,
 * Begin else-part of a conditional
 */
 void
 lp_build_else(struct lp_build_if_state *ctx)
 lp_build_else(struct lp_build_if_state *ifthen)
 {
   struct lp_build_flow_context *flow = ctx->flow;
   struct lp_build_flow_if *ifthen;
   unsigned i;

   ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen;
   assert(ifthen);

   /* for each variable, update the Phi node with a (variable, block) pair */
   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
   for (i = 0; i < flow->num_variables; i++) {
      assert(*flow->variables[i]);
      LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);
   }
   /* Append an unconditional Br(anch) instruction on the true_block */
   LLVMBuildBr(ifthen->builder, ifthen->merge_block);

   /* create/insert false_block before the merge block */
   ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block");

   /* successive code goes into the else block */
   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
   LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block);
 }


@@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx)
 * End a conditional.
 */
 void
 lp_build_endif(struct lp_build_if_state *ctx)
 lp_build_endif(struct lp_build_if_state *ifthen)
 {
   struct lp_build_flow_context *flow = ctx->flow;
   struct lp_build_flow_if *ifthen;
   LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
   unsigned i;

   ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
   assert(ifthen);

   /* Insert branch to the merge block from current block */
   LLVMBuildBr(ctx->builder, ifthen->merge_block);
   LLVMBuildBr(ifthen->builder, ifthen->merge_block);

   if (ifthen->false_block) {
      LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
      /* for each variable, update the Phi node with a (variable, block) pair */
      for (i = 0; i < flow->num_variables; i++) {
         assert(*flow->variables[i]);
         LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
         /* replace the variable ref with the phi function */
         *flow->variables[i] = ifthen->phi[i];
      }
   }
   else {
      /* no else clause */
      LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
      for (i = 0; i < flow->num_variables; i++) {
         assert(*flow->variables[i]);
         LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);

         /* replace the variable ref with the phi function */
         *flow->variables[i] = ifthen->phi[i];
      }
   }

   FREE(ifthen->phi);

   /***
    *** Now patch in the various branch instructions.
    ***/
   /*
    * Now patch in the various branch instructions.
    */

   /* Insert the conditional branch instruction at the end of entry_block */
   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block);
   LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block);
   if (ifthen->false_block) {
      /* we have an else clause */
      LLVMBuildCondBr(ctx->builder, ifthen->condition,
      LLVMBuildCondBr(ifthen->builder, ifthen->condition,
                      ifthen->true_block, ifthen->false_block);
   }
   else {
      /* no else clause */
      LLVMBuildCondBr(ctx->builder, ifthen->condition,
      LLVMBuildCondBr(ifthen->builder, ifthen->condition,
                      ifthen->true_block, ifthen->merge_block);
   }

   /* Insert branch from end of true_block to merge_block */
   if (ifthen->false_block) {
      /* Append an unconditional Br(anch) instruction on the true_block */
      LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
      LLVMBuildBr(ctx->builder, ifthen->merge_block);
   }
   else {
      /* No else clause.
       * Note that we've already inserted the branch at the end of
       * true_block.  See the very first LLVMBuildBr() call in this function.
       */
   }

   /* Resume building code at end of the ifthen->merge_block */
   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
   LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block);
 }


@@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder,
   }

   res = LLVMBuildAlloca(first_builder, type, name);
   LLVMBuildStore(builder, LLVMConstNull(type), res);

   LLVMDisposeBuilder(first_builder);

--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -41,52 +41,49 @@
 struct lp_type;


 struct lp_build_flow_context;


 struct lp_build_flow_context *
 lp_build_flow_create(LLVMBuilderRef builder);

 void
 lp_build_flow_destroy(struct lp_build_flow_context *flow);

 void
 lp_build_flow_scope_begin(struct lp_build_flow_context *flow);

 void
 lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
                            LLVMValueRef *variable);
 /**
 * Early exit. Useful to skip to the end of a function or block when
 * the execution mask becomes zero or when there is an error condition.
 */
 struct lp_build_skip_context
 {
   LLVMBuilderRef builder;

 void
 lp_build_flow_scope_end(struct lp_build_flow_context *flow);
   /** Block to skip to */
   LLVMBasicBlockRef block;
 };

 void
 lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
 lp_build_flow_skip_begin(struct lp_build_skip_context *ctx,
                         LLVMBuilderRef builder);

 void
 lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
 lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx,
                              LLVMValueRef cond);

 void
 lp_build_flow_skip_end(struct lp_build_flow_context *flow);
 lp_build_flow_skip_end(struct lp_build_skip_context *ctx);


 struct lp_build_mask_context
 {
   struct lp_build_flow_context *flow;
   struct lp_build_skip_context skip;

   LLVMTypeRef reg_type;

   LLVMValueRef value;
   LLVMValueRef var;
 };


 void
 lp_build_mask_begin(struct lp_build_mask_context *mask,
                    struct lp_build_flow_context *flow,
                    LLVMBuilderRef builder,
                    struct lp_type type,
                    LLVMValueRef value);

 LLVMValueRef
 lp_build_mask_value(struct lp_build_mask_context *mask);

 /**
 * Bitwise AND the mask with the given value, if a previous mask was set.
 */
@@ -94,6 +91,9 @@ void
 lp_build_mask_update(struct lp_build_mask_context *mask,
                     LLVMValueRef value);

 void
 lp_build_mask_check(struct lp_build_mask_context *mask);

 LLVMValueRef
 lp_build_mask_end(struct lp_build_mask_context *mask);

@@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask);
 struct lp_build_loop_state
 {
  LLVMBasicBlockRef block;
  LLVMValueRef counter_var;
  LLVMValueRef counter;
 };

@@ -128,22 +129,28 @@ void
 lp_build_loop_end_cond(LLVMBuilderRef builder,
                       LLVMValueRef end,
                       LLVMValueRef step,
                       int cond, /* LLVM condition */
                       LLVMIntPredicate cond,
                       struct lp_build_loop_state *state);




 /**
 * if/else/endif.
 */
 struct lp_build_if_state
 {
   LLVMBuilderRef builder;
   struct lp_build_flow_context *flow;
   LLVMValueRef condition;
   LLVMBasicBlockRef entry_block;
   LLVMBasicBlockRef true_block;
   LLVMBasicBlockRef false_block;
   LLVMBasicBlockRef merge_block;
 };


 void
 lp_build_if(struct lp_build_if_state *ctx,
            struct lp_build_flow_context *flow,
            LLVMBuilderRef builder,
            LLVMValueRef condition);

--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -35,6 +35,7 @@


 #include "util/u_format.h"
 #include "util/u_cpu_detect.h"

 #include "lp_bld_arit.h"
 #include "lp_bld_type.h"
@@ -42,7 +43,7 @@
 #include "lp_bld_conv.h"
 #include "lp_bld_gather.h"
 #include "lp_bld_format.h"

 #include "lp_bld_logic.h"

 /**
 * Extract Y, U, V channels from packed UYVY.
@@ -59,7 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
                LLVMValueRef *v)
 {
   struct lp_type type;
   LLVMValueRef shift, mask;
   LLVMValueRef mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
@@ -69,14 +70,37 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
   assert(lp_check_value(type, i));

   /*
    * y = (uyvy >> 16*i) & 0xff
    * y = (uyvy >> (16*i + 8)) & 0xff
    * u = (uyvy        ) & 0xff
    * v = (uyvy >> 16  ) & 0xff
    */

   shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
   shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
   *y = LLVMBuildLShr(builder, packed, shift, "");
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * Avoid shift with per-element count.
    * No support on x86, gets translated to roughly 5 instructions
    * per element. Didn't measure performance but cuts shader size
    * by quite a bit (less difference if cpu has no sse4.1 support).
    */
   if (util_cpu_caps.has_sse2 && n == 4) {
      LLVMValueRef sel, tmp, tmp2;
      struct lp_build_context bld32;

      lp_build_context_init(&bld32, builder, type);

      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
      tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
      sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
      *y = lp_build_select(&bld32, sel, tmp, tmp2);
   } else
 #endif
   {
      LLVMValueRef shift;
      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
      *y = LLVMBuildLShr(builder, packed, shift, "");
   }

   *u = packed;
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");

@@ -103,7 +127,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
                LLVMValueRef *v)
 {
   struct lp_type type;
   LLVMValueRef shift, mask;
   LLVMValueRef mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
@@ -118,8 +142,30 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
    * v = (yuyv >> 24  ) & 0xff
    */

   shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
   *y = LLVMBuildLShr(builder, packed, shift, "");
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * Avoid shift with per-element count.
    * No support on x86, gets translated to roughly 5 instructions
    * per element. Didn't measure performance but cuts shader size
    * by quite a bit (less difference if cpu has no sse4.1 support).
    */
   if (util_cpu_caps.has_sse2 && n == 4) {
      LLVMValueRef sel, tmp;
      struct lp_build_context bld32;

      lp_build_context_init(&bld32, builder, type);

      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
      sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
       *y = lp_build_select(&bld32, sel, packed, tmp);
   } else
 #endif
   {
      LLVMValueRef shift;
      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
      *y = LLVMBuildLShr(builder, packed, shift, "");
   }

   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");

--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
   { "asm",    GALLIVM_DEBUG_ASM, NULL },
   { "nopt",   GALLIVM_DEBUG_NO_OPT, NULL },
   { "perf",   GALLIVM_DEBUG_PERF, NULL },
   { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL },
   DEBUG_NAMED_VALUE_END
 };

--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -47,4 +47,10 @@ lp_build_init(void);
 extern void
 lp_func_delete_body(LLVMValueRef func);


 extern LLVMValueRef
 lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
                       const char *Name);


 #endif /* !LP_BLD_INIT_H */
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder,
   if(func == PIPE_FUNC_ALWAYS)
      return ones;

   /* TODO: optimize the constant case */
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * There are no unsigned integer comparison instructions in SSE.
    */

   /* XXX: It is not clear if we should use the ordered or unordered operators */
   if (!type.floating && !type.sign &&
       type.width * type.length == 128 &&
       util_cpu_caps.has_sse2 &&
       (func == PIPE_FUNC_LESS ||
        func == PIPE_FUNC_LEQUAL ||
        func == PIPE_FUNC_GREATER ||
        func == PIPE_FUNC_GEQUAL) &&
       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
                      __FUNCTION__, type.length, type.width);
   }
 #endif

 #if HAVE_LLVM < 0x0207
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder,
 #endif
 #endif /* HAVE_LLVM < 0x0207 */

   /* XXX: It is not clear if we should use the ordered or unordered operators */

   if(type.floating) {
      LLVMRealPredicate op;
      switch(func) {
@@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld,
      LLVMTypeRef arg_type;
      LLVMValueRef args[3];

      if (type.width == 64) {
      if (type.floating &&
          type.width == 64) {
         intrinsic = "llvm.x86.sse41.blendvpd";
         arg_type = LLVMVectorType(LLVMDoubleType(), 2);
      } else if (type.width == 32) {
      } else if (type.floating &&
                 type.width == 32) {
         intrinsic = "llvm.x86.sse41.blendvps";
         arg_type = LLVMVectorType(LLVMFloatType(), 4);
      } else {
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF)
   llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
   func->deleteBody();
 }


 extern "C"
 LLVMValueRef
 lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
                       const char *Name)
 {
   return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
 }

--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -29,6 +29,8 @@

 #include "util/u_debug.h"
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include "lp_bld_const.h"
 #include "lp_bld_printf.h"


@@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...)
   return LLVMBuildCall(builder, func_printf, params, argcount + 1, "");
 }



 /**
 * Print a float[4] vector.
 */
 LLVMValueRef
 lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec)
 {
   char format[1000];
   LLVMValueRef x, y, z, w;

   x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), "");
   y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), "");
   z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), "");
   w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), "");

   util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg);
   return lp_build_printf(builder, format, x, y, z, w);
 }
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
@@ -35,5 +35,9 @@
 LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len);
 LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...);

 LLVMValueRef
 lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec);


 #endif

--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -81,11 +81,15 @@ LLVMValueRef
 lp_build_scalar_ddx(struct lp_build_context *bld,
                    LLVMValueRef a)
 {
   LLVMValueRef idx_left  = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
   LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
   LLVMValueRef a_left  = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
   LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
   return lp_build_sub(bld, a_right, a_left);
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMValueRef idx_left  = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
   LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0);
   LLVMValueRef a_left  = LLVMBuildExtractElement(bld->builder, a, idx_left, "left");
   LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right");
   if (bld->type.floating)
      return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx");
   else
      return LLVMBuildSub(bld->builder, a_right, a_left, "ddx");
 }


@@ -93,9 +97,13 @@ LLVMValueRef
 lp_build_scalar_ddy(struct lp_build_context *bld,
                    LLVMValueRef a)
 {
   LLVMValueRef idx_top    = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
   LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
   LLVMValueRef a_top    = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
   LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
   return lp_build_sub(bld, a_bottom, a_top);
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMValueRef idx_top    = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
   LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0);
   LLVMValueRef a_top    = LLVMBuildExtractElement(bld->builder, a, idx_top, "top");
   LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom");
   if (bld->type.floating)
      return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy");
   else
      return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy");
 }
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -39,12 +39,52 @@
 #include "lp_bld_arit.h"
 #include "lp_bld_const.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_printf.h"
 #include "lp_bld_flow.h"
 #include "lp_bld_sample.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_type.h"


 /*
 * Bri-linear factor. Should be greater than one.
 */
 #define BRILINEAR_FACTOR 2


 /**
 * Does the given texture wrap mode allow sampling the texture border color?
 * XXX maybe move this into gallium util code.
 */
 boolean
 lp_sampler_wrap_mode_uses_border_color(unsigned mode,
                                       unsigned min_img_filter,
                                       unsigned mag_img_filter)
 {
   switch (mode) {
   case PIPE_TEX_WRAP_REPEAT:
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   case PIPE_TEX_WRAP_MIRROR_REPEAT:
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
      return FALSE;
   case PIPE_TEX_WRAP_CLAMP:
   case PIPE_TEX_WRAP_MIRROR_CLAMP:
      if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
          mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
         return FALSE;
      } else {
         return TRUE;
      }
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
      return TRUE;
   default:
      assert(0 && "unexpected wrap mode");
      return FALSE;
   }
 }


 /**
 * Initialize lp_sampler_static_state object with the gallium sampler
 * and texture state.
@@ -93,31 +133,40 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
   state->wrap_r            = sampler->wrap_r;
   state->min_img_filter    = sampler->min_img_filter;
   state->mag_img_filter    = sampler->mag_img_filter;
   if (view->last_level) {

   if (view->last_level && sampler->max_lod > 0.0f) {
      state->min_mip_filter = sampler->min_mip_filter;
   } else {
      state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
   }

   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
      if (sampler->lod_bias != 0.0f) {
         state->lod_bias_non_zero = 1;
      }

      /* If min_lod == max_lod we can greatly simplify mipmap selection.
       * This is a case that occurs during automatic mipmap generation.
       */
      if (sampler->min_lod == sampler->max_lod) {
         state->min_max_lod_equal = 1;
      } else {
         if (sampler->min_lod > 0.0f) {
            state->apply_min_lod = 1;
         }

         if (sampler->max_lod < (float)view->last_level) {
            state->apply_max_lod = 1;
         }
      }
   }

   state->compare_mode      = sampler->compare_mode;
   if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
      state->compare_func   = sampler->compare_func;
   }

   state->normalized_coords = sampler->normalized_coords;
   state->lod_bias          = sampler->lod_bias;
   if (!view->last_level &&
       sampler->min_img_filter == sampler->mag_img_filter) {
      state->min_lod        = 0.0f;
      state->max_lod        = 0.0f;
   } else {
      state->min_lod        = MAX2(sampler->min_lod, 0.0f);
      state->max_lod        = sampler->max_lod;
   }
   state->border_color[0]   = sampler->border_color[0];
   state->border_color[1]   = sampler->border_color[1];
   state->border_color[2]   = sampler->border_color[2];
   state->border_color[3]   = sampler->border_color[3];

   /*
    * FIXME: Handle the remainder of pipe_sampler_view.
@@ -125,6 +174,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 }


 /**
 * Generate code to compute coordinate gradient (rho).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 *
 * XXX: The resulting rho is scalar, so we ignore all but the first element of
 * derivatives that are passed by the shader.
 */
 static LLVMValueRef
 lp_build_rho(struct lp_build_sample_context *bld,
             const LLVMValueRef ddx[4],
             const LLVMValueRef ddy[4])
 {
   struct lp_build_context *float_size_bld = &bld->float_size_bld;
   struct lp_build_context *float_bld = &bld->float_bld;
   const unsigned dims = bld->dims;
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
   LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
   LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
   LLVMValueRef rho_x, rho_y;
   LLVMValueRef rho_vec;
   LLVMValueRef float_size;
   LLVMValueRef rho;

   dsdx = ddx[0];
   dsdy = ddy[0];

   if (dims <= 1) {
      rho_x = dsdx;
      rho_y = dsdy;
   }
   else {
      rho_x = float_size_bld->undef;
      rho_y = float_size_bld->undef;

      rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
      rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");

      dtdx = ddx[1];
      dtdy = ddy[1];

      rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
      rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");

      if (dims >= 3) {
         drdx = ddx[2];
         drdy = ddy[2];

         rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
         rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
      }
   }

   rho_x = lp_build_abs(float_size_bld, rho_x);
   rho_y = lp_build_abs(float_size_bld, rho_y);

   rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);

   float_size = lp_build_int_to_float(float_size_bld, bld->int_size);

   rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);

   if (dims <= 1) {
      rho = rho_vec;
   }
   else {
      if (dims >= 2) {
         LLVMValueRef rho_s, rho_t, rho_r;

         rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
         rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");

         rho = lp_build_max(float_bld, rho_s, rho_t);

         if (dims >= 3) {
            rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
            rho = lp_build_max(float_bld, rho, rho_r);
         }
      }
   }

   return rho;
 }


 /*
 * Bri-linear lod computation
 *
 * Use a piece-wise linear approximation of log2 such that:
 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
 *   with the steepness specified in 'factor'
 * - exact result for 0.5, 1.5, etc.
 *
 *
 *   1.0 -              /----*
 *                     /
 *                    /
 *                   /
 *   0.5 -          *
 *                 /
 *                /
 *               /
 *   0.0 - *----/
 *
 *         |                 |
 *        2^0               2^1
 *
 * This is a technique also commonly used in hardware:
 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
 *
 * TODO: For correctness, this should only be applied when texture is known to
 * have regular mipmaps, i.e., mipmaps derived from the base level.
 *
 * TODO: This could be done in fixed point, where applicable.
 */
 static void
 lp_build_brilinear_lod(struct lp_build_context *bld,
                       LLVMValueRef lod,
                       double factor,
                       LLVMValueRef *out_lod_ipart,
                       LLVMValueRef *out_lod_fpart)
 {
   LLVMValueRef lod_fpart;
   double pre_offset = (factor - 0.5)/factor - 0.5;
   double post_offset = 1 - factor;

   if (0) {
      lp_build_printf(bld->builder, "lod = %f\n", lod);
   }

   lod = lp_build_add(bld, lod,
                      lp_build_const_vec(bld->type, pre_offset));

   lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);

   lod_fpart = lp_build_mul(bld, lod_fpart,
                            lp_build_const_vec(bld->type, factor));

   lod_fpart = lp_build_add(bld, lod_fpart,
                            lp_build_const_vec(bld->type, post_offset));

   /*
    * It's not necessary to clamp lod_fpart since:
    * - the above expression will never produce numbers greater than one.
    * - the mip filtering branch is only taken if lod_fpart is positive
    */

   *out_lod_fpart = lod_fpart;

   if (0) {
      lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart);
      lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart);
   }
 }


 /*
 * Combined log2 and brilinear lod computation.
 *
 * It's in all identical to calling lp_build_fast_log2() and
 * lp_build_brilinear_lod() above, but by combining we can compute the interger
 * and fractional part independently.
 */
 static void
 lp_build_brilinear_rho(struct lp_build_context *bld,
                       LLVMValueRef rho,
                       double factor,
                       LLVMValueRef *out_lod_ipart,
                       LLVMValueRef *out_lod_fpart)
 {
   LLVMValueRef lod_ipart;
   LLVMValueRef lod_fpart;

   const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
   const double post_offset = 1 - 2*factor;

   assert(bld->type.floating);

   assert(lp_check_value(bld->type, rho));

   /*
    * The pre factor will make the intersections with the exact powers of two
    * happen precisely where we want then to be, which means that the integer
    * part will not need any post adjustments.
    */
   rho = lp_build_mul(bld, rho,
                      lp_build_const_vec(bld->type, pre_factor));

   /* ipart = ifloor(log2(rho)) */
   lod_ipart = lp_build_extract_exponent(bld, rho, 0);

   /* fpart = rho / 2**ipart */
   lod_fpart = lp_build_extract_mantissa(bld, rho);

   lod_fpart = lp_build_mul(bld, lod_fpart,
                            lp_build_const_vec(bld->type, factor));

   lod_fpart = lp_build_add(bld, lod_fpart,
                            lp_build_const_vec(bld->type, post_offset));

   /*
    * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
    * - the above expression will never produce numbers greater than one.
    * - the mip filtering branch is only taken if lod_fpart is positive
    */

   *out_lod_ipart = lod_ipart;
   *out_lod_fpart = lod_fpart;
 }


 /**
 * Generate code to compute texture level of detail (lambda).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
@@ -138,83 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 * XXX: The resulting lod is scalar, so ignore all but the first element of
 * derivatives, lod_bias, etc that are passed by the shader.
 */
 LLVMValueRef
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                      unsigned unit,
                      const LLVMValueRef ddx[4],
                      const LLVMValueRef ddy[4],
                      LLVMValueRef lod_bias, /* optional */
                      LLVMValueRef explicit_lod, /* optional */
                      LLVMValueRef width,
                      LLVMValueRef height,
                      LLVMValueRef depth)
                      unsigned mip_filter,
                      LLVMValueRef *out_lod_ipart,
                      LLVMValueRef *out_lod_fpart)

 {
   if (bld->static_state->min_lod == bld->static_state->max_lod) {
   struct lp_build_context *float_bld = &bld->float_bld;
   LLVMValueRef lod;

   *out_lod_ipart = bld->int_bld.zero;
   *out_lod_fpart = bld->float_bld.zero;

   if (bld->static_state->min_max_lod_equal) {
      /* User is forcing sampling from a particular mipmap level.
       * This is hit during mipmap generation.
       */
      return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
      LLVMValueRef min_lod =
         bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);

      lod = min_lod;
   }
   else {
      struct lp_build_context *float_bld = &bld->float_bld;
      LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
                                                    bld->static_state->lod_bias);
      LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
                                           bld->static_state->min_lod);
      LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
                                           bld->static_state->max_lod);
      LLVMValueRef sampler_lod_bias =
         bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
      LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
      LLVMValueRef lod;

      if (explicit_lod) {
         lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
                                       index0, "");
      }
      else {
         const int dims = texture_dims(bld->static_state->target);
         LLVMValueRef dsdx, dsdy;
         LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
         LLVMValueRef rho;

         dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
         dsdx = lp_build_abs(float_bld, dsdx);
         dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
         dsdy = lp_build_abs(float_bld, dsdy);
         if (dims > 1) {
            dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
            dtdx = lp_build_abs(float_bld, dtdx);
            dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
            dtdy = lp_build_abs(float_bld, dtdy);
            if (dims > 2) {
               drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
               drdx = lp_build_abs(float_bld, drdx);
               drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
               drdy = lp_build_abs(float_bld, drdy);
            }
         }
         rho = lp_build_rho(bld, ddx, ddy);

         /* Compute rho = max of all partial derivatives scaled by texture size.
          * XXX this could be vectorized somewhat
         /*
          * Compute lod = log2(rho)
          */
         rho = LLVMBuildFMul(bld->builder,
                            lp_build_max(float_bld, dsdx, dsdy),
                            lp_build_int_to_float(float_bld, width), "");
         if (dims > 1) {
            LLVMValueRef max;
            max = LLVMBuildFMul(bld->builder,
                               lp_build_max(float_bld, dtdx, dtdy),
                               lp_build_int_to_float(float_bld, height), "");
            rho = lp_build_max(float_bld, rho, max);
            if (dims > 2) {
               max = LLVMBuildFMul(bld->builder,
                                  lp_build_max(float_bld, drdx, drdy),
                                  lp_build_int_to_float(float_bld, depth), "");
               rho = lp_build_max(float_bld, rho, max);

         if (!lod_bias &&
             !bld->static_state->lod_bias_non_zero &&
             !bld->static_state->apply_max_lod &&
             !bld->static_state->apply_min_lod) {
            /*
             * Special case when there are no post-log2 adjustments, which
             * saves instructions but keeping the integer and fractional lod
             * computations separate from the start.
             */

            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
               *out_lod_ipart = lp_build_ilog2(float_bld, rho);
               *out_lod_fpart = bld->float_bld.zero;
               return;
            }
            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
               lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
                                      out_lod_ipart, out_lod_fpart);
               return;
            }
         }

         /* compute lod = log2(rho) */
         lod = lp_build_log2(float_bld, rho);
         if (0) {
            lod = lp_build_log2(float_bld, rho);
         }
         else {
            lod = lp_build_fast_log2(float_bld, rho);
         }

         /* add shader lod bias */
         if (lod_bias) {
@@ -225,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
      }

      /* add sampler lod bias */
      lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
      if (bld->static_state->lod_bias_non_zero)
         lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");


      /* clamp lod */
      lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
      if (bld->static_state->apply_max_lod) {
         LLVMValueRef max_lod =
            bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);

      return lod;
         lod = lp_build_min(float_bld, lod, max_lod);
      }
      if (bld->static_state->apply_min_lod) {
         LLVMValueRef min_lod =
            bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);

         lod = lp_build_max(float_bld, lod, min_lod);
      }
   }

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
         lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
                                out_lod_ipart, out_lod_fpart);
      }
      else {
         lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
      }

      lp_build_name(*out_lod_fpart, "lod_fpart");
   }
   else {
      *out_lod_ipart = lp_build_iround(float_bld, lod);
   }

   lp_build_name(*out_lod_ipart, "lod_ipart");

   return;
 }


@@ -245,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *level_out)
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMValueRef last_level, level;

@@ -258,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                                               bld->builder, unit);

   /* convert float lod to integer */
   level = lp_build_iround(float_bld, lod);
   level = lod_ipart;

   /* clamp level to legal range of levels */
   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -273,43 +563,77 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *lod_fpart_inout,
                           LLVMValueRef *level0_out,
                           LLVMValueRef *level1_out,
                           LLVMValueRef *weight_out)
                           LLVMValueRef *level1_out)
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   LLVMBuilderRef builder = bld->builder;
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMValueRef last_level, level;
   struct lp_build_context *float_bld = &bld->float_bld;
   LLVMValueRef last_level;
   LLVMValueRef clamp_min;
   LLVMValueRef clamp_max;

   *level0_out = lod_ipart;
   *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);

   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                               bld->builder, unit);

   /* convert float lod to integer */
   level = lp_build_ifloor(float_bld, lod);

   /* compute level 0 and clamp to legal range of levels */
   *level0_out = lp_build_clamp(int_bld, level,
                                int_bld->zero,
                                last_level);
   /* compute level 1 and clamp to legal range of levels */
   level = lp_build_add(int_bld, level, int_bld->one);
   *level1_out = lp_build_clamp(int_bld, level,
                                int_bld->zero,
                                last_level);

   *weight_out = lp_build_fract(float_bld, lod);
   /*
    * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
    * minimum number of comparisons, and zeroing lod_fpart in the extreme
    * ends in the process.
    */

   /* lod_ipart < 0 */
   clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
                             lod_ipart, int_bld->zero,
                             "clamp_lod_to_zero");

   *level0_out = LLVMBuildSelect(builder, clamp_min,
                                 int_bld->zero, *level0_out, "");

   *level1_out = LLVMBuildSelect(builder, clamp_min,
                                 int_bld->zero, *level1_out, "");

   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
                                      float_bld->zero, *lod_fpart_inout, "");

   /* lod_ipart >= last_level */
   clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
                             lod_ipart, last_level,
                             "clamp_lod_to_last");

   *level0_out = LLVMBuildSelect(builder, clamp_max,
                                 last_level, *level0_out, "");

   *level1_out = LLVMBuildSelect(builder, clamp_max,
                                 last_level, *level1_out, "");

   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
                                      float_bld->zero, *lod_fpart_inout, "");

   lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
   lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
   lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
 }


 /**
 * Return pointer to a single mipmap level.
 * \param data_array  array of pointers to mipmap levels
 * \param level  integer mipmap level
 */
 LLVMValueRef
 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
                          LLVMValueRef data_array, LLVMValueRef level)
                          LLVMValueRef level)
 {
   LLVMValueRef indexes[2], data_ptr;
   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
   indexes[1] = level;
   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
   data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, "");
   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
   return data_ptr;
 }
@@ -317,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,

 LLVMValueRef
 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
                                LLVMValueRef data_array, int level)
                                int level)
 {
   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
   return lp_build_get_mipmap_level(bld, data_array, lvl);
   return lp_build_get_mipmap_level(bld, lvl);
 }


@@ -329,13 +653,24 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 * Return max(1, base_size >> level);
 */
 static LLVMValueRef
 lp_build_minify(struct lp_build_sample_context *bld,
 lp_build_minify(struct lp_build_context *bld,
                LLVMValueRef base_size,
                LLVMValueRef level)
 {
   LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
   return size;
   assert(lp_check_value(bld->type, base_size));
   assert(lp_check_value(bld->type, level));

   if (level == bld->zero) {
      /* if we're using mipmap level zero, no minification is needed */
      return base_size;
   }
   else {
      LLVMValueRef size =
         LLVMBuildLShr(bld->builder, base_size, level, "minify");
      assert(bld->type.sign);
      size = lp_build_max(bld, size, bld->one);
      return size;
   }
 }


@@ -364,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 */
 void
 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
                            unsigned dims,
                            LLVMValueRef width_vec,
                            LLVMValueRef height_vec,
                            LLVMValueRef depth_vec,
                            LLVMValueRef ilevel0,
                            LLVMValueRef ilevel1,
                            LLVMValueRef row_stride_array,
                            LLVMValueRef img_stride_array,
                            LLVMValueRef *width0_vec,
                            LLVMValueRef *width1_vec,
                            LLVMValueRef *height0_vec,
                            LLVMValueRef *height1_vec,
                            LLVMValueRef *depth0_vec,
                            LLVMValueRef *depth1_vec,
                            LLVMValueRef *row_stride0_vec,
                            LLVMValueRef *row_stride1_vec,
                            LLVMValueRef *img_stride0_vec,
                            LLVMValueRef *img_stride1_vec)
                            LLVMValueRef ilevel,
                            LLVMValueRef *out_size,
                            LLVMValueRef *row_stride_vec,
                            LLVMValueRef *img_stride_vec)
 {
   const unsigned mip_filter = bld->static_state->min_mip_filter;
   LLVMValueRef ilevel0_vec, ilevel1_vec;
   const unsigned dims = bld->dims;
   LLVMValueRef ilevel_vec;

   ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
      ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
   ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);

   /*
    * Compute width, height, depth at mipmap level 'ilevel0'
    * Compute width, height, depth at mipmap level 'ilevel'
    */
   *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
   *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);

   if (dims >= 2) {
      *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
      *row_stride0_vec = lp_build_get_level_stride_vec(bld,
                                                       row_stride_array,
                                                       ilevel0);
      *row_stride_vec = lp_build_get_level_stride_vec(bld,
                                                      bld->row_stride_array,
                                                      ilevel);
      if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
         *img_stride0_vec = lp_build_get_level_stride_vec(bld,
                                                          img_stride_array,
                                                          ilevel0);
         if (dims == 3) {
            *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
         }
         *img_stride_vec = lp_build_get_level_stride_vec(bld,
                                                         bld->img_stride_array,
                                                         ilevel);
      }
   }
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      /* compute width, height, depth for second mipmap level at 'ilevel1' */
      *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
      if (dims >= 2) {
         *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
         *row_stride1_vec = lp_build_get_level_stride_vec(bld,
                                                          row_stride_array,
                                                          ilevel1);
         if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
            *img_stride1_vec = lp_build_get_level_stride_vec(bld,
                                                             img_stride_array,
                                                             ilevel1);
            if (dims == 3) {
               *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
            }
         }
 }


 /**
 * Extract and broadcast texture size.
 *
 * @param size_type   type of the texture size vector (either
 *                    bld->int_size_type or bld->float_size_type)
 * @param coord_type  type of the texture size vector (either
 *                    bld->int_coord_type or bld->coord_type)
 * @param int_size    vector with the integer texture size (width, height,
 *                    depth)
 */
 void
 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
                             struct lp_type size_type,
                             struct lp_type coord_type,
                             LLVMValueRef size,
                             LLVMValueRef *out_width,
                             LLVMValueRef *out_height,
                             LLVMValueRef *out_depth)
 {
   const unsigned dims = bld->dims;
   LLVMTypeRef i32t = LLVMInt32Type();

   *out_width = lp_build_extract_broadcast(bld->builder,
                                           size_type,
                                           coord_type,
                                           size,
                                           LLVMConstInt(i32t, 0, 0));
   if (dims >= 2) {
      *out_height = lp_build_extract_broadcast(bld->builder,
                                               size_type,
                                               coord_type,
                                               size,
                                               LLVMConstInt(i32t, 1, 0));
      if (dims == 3) {
         *out_depth = lp_build_extract_broadcast(bld->builder,
                                                 size_type,
                                                 coord_type,
                                                 size,
                                                 LLVMConstInt(i32t, 2, 0));
      }
   }
 }


 /**
 * Unnormalize coords.
 *
 * @param int_size  vector with the integer texture size (width, height, depth)
 */
 void
 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
                             LLVMValueRef flt_size,
                             LLVMValueRef *s,
                             LLVMValueRef *t,
                             LLVMValueRef *r)
 {
   const unsigned dims = bld->dims;
   LLVMValueRef width;
   LLVMValueRef height;
   LLVMValueRef depth;

   lp_build_extract_image_sizes(bld,
                                bld->float_size_type,
                                bld->coord_type,
                                flt_size,
                                &width,
                                &height,
                                &depth);

   /* s = s * width, t = t * height */
   *s = lp_build_mul(&bld->coord_bld, *s, width);
   if (dims >= 2) {
      *t = lp_build_mul(&bld->coord_bld, *t, height);
      if (dims >= 3) {
         *r = lp_build_mul(&bld->coord_bld, *r, depth);
      }
   }
 }


 /** Helper used by lp_build_cube_lookup() */
 static LLVMValueRef
@@ -547,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
   rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");

   {
      struct lp_build_flow_context *flow_ctx;
      struct lp_build_if_state if_ctx;
      LLVMValueRef face_s_var;
      LLVMValueRef face_t_var;
      LLVMValueRef face_var;

      flow_ctx = lp_build_flow_create(bld->builder);
      lp_build_flow_scope_begin(flow_ctx);

      *face_s = bld->coord_bld.undef;
      *face_t = bld->coord_bld.undef;
      *face = bld->int_bld.undef;

      lp_build_name(*face_s, "face_s");
      lp_build_name(*face_t, "face_t");
      lp_build_name(*face, "face");
      face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var");
      face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var");
      face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var");

      lp_build_flow_scope_declare(flow_ctx, face_s);
      lp_build_flow_scope_declare(flow_ctx, face_t);
      lp_build_flow_scope_declare(flow_ctx, face);

      lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
      lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz);
      {
         /* +/- X face */
         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
@@ -575,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
         *face = lp_build_cube_face(bld, rx,
                                    PIPE_TEX_FACE_POS_X,
                                    PIPE_TEX_FACE_NEG_X);
         LLVMBuildStore(bld->builder, *face_s, face_s_var);
         LLVMBuildStore(bld->builder, *face_t, face_t_var);
         LLVMBuildStore(bld->builder, *face, face_var);
      }
      lp_build_else(&if_ctx);
      {
         struct lp_build_flow_context *flow_ctx2;
         struct lp_build_if_state if_ctx2;

         LLVMValueRef face_s2 = bld->coord_bld.undef;
         LLVMValueRef face_t2 = bld->coord_bld.undef;
         LLVMValueRef face2 = bld->int_bld.undef;

         flow_ctx2 = lp_build_flow_create(bld->builder);
         lp_build_flow_scope_begin(flow_ctx2);
         lp_build_flow_scope_declare(flow_ctx2, &face_s2);
         lp_build_flow_scope_declare(flow_ctx2, &face_t2);
         lp_build_flow_scope_declare(flow_ctx2, &face2);

         ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");

         lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
         lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz);
         {
            /* +/- Y face */
            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
            face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
            face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
            face2 = lp_build_cube_face(bld, ry,
            *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
            *face = lp_build_cube_face(bld, ry,
                                       PIPE_TEX_FACE_POS_Y,
                                       PIPE_TEX_FACE_NEG_Y);
            LLVMBuildStore(bld->builder, *face_s, face_s_var);
            LLVMBuildStore(bld->builder, *face_t, face_t_var);
            LLVMBuildStore(bld->builder, *face, face_var);
         }
         lp_build_else(&if_ctx2);
         {
            /* +/- Z face */
            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
            face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
            face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
            face2 = lp_build_cube_face(bld, rz,
            *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
            *face = lp_build_cube_face(bld, rz,
                                       PIPE_TEX_FACE_POS_Z,
                                       PIPE_TEX_FACE_NEG_Z);
            LLVMBuildStore(bld->builder, *face_s, face_s_var);
            LLVMBuildStore(bld->builder, *face_t, face_t_var);
            LLVMBuildStore(bld->builder, *face, face_var);
         }
         lp_build_endif(&if_ctx2);
         lp_build_flow_scope_end(flow_ctx2);
         lp_build_flow_destroy(flow_ctx2);
         *face_s = face_s2;
         *face_t = face_t2;
         *face = face2;
      }

      lp_build_endif(&if_ctx);
      lp_build_flow_scope_end(flow_ctx);
      lp_build_flow_destroy(flow_ctx);

      *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s");
      *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t");
      *face   = LLVMBuildLoad(bld->builder, face_var, "face");
   }
 }

@@ -659,11 +1022,21 @@ lp_build_sample_partial_offset(struct lp_build_context *bld,
       * Pixel blocks have power of two dimensions. LLVM should convert the
       * rem/div to bit arithmetic.
       * TODO: Verify this.
       * It does indeed BUT it does transform it to scalar (and back) when doing so
       * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
       * The generated code looks seriously unfunny and is quite expensive.
       */

 #if 0
      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
      subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
      coord    = LLVMBuildUDiv(bld->builder, coord, block_width, "");
 #else
      unsigned logbase2 = util_unsigned_logbase2(block_length);
      LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2);
      LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1);
      subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, "");
      coord = LLVMBuildLShr(bld->builder, coord, block_shift, "");
 #endif
   }

   offset = lp_build_mul(bld, coord, stride);
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -82,12 +82,10 @@ struct lp_sampler_static_state
   unsigned compare_mode:1;
   unsigned compare_func:3;
   unsigned normalized_coords:1;
   float lod_bias, min_lod, max_lod;
   float border_color[4];

   /* Aero hacks */
   unsigned force_nearest_s:1;
   unsigned force_nearest_t:1;
   unsigned min_max_lod_equal:1;  /**< min_lod == max_lod ? */
   unsigned lod_bias_non_zero:1;
   unsigned apply_min_lod:1;  /**< min_lod > 0 ? */
   unsigned apply_max_lod:1;  /**< max_lod < last_level ? */
 };


@@ -104,45 +102,67 @@ struct lp_sampler_static_state
 struct lp_sampler_dynamic_state
 {

   /** Obtain the base texture width. */
   /** Obtain the base texture width (returns int32) */
   LLVMValueRef
   (*width)( const struct lp_sampler_dynamic_state *state,
             LLVMBuilderRef builder,
             unsigned unit);

   /** Obtain the base texture height. */
   /** Obtain the base texture height (returns int32) */
   LLVMValueRef
   (*height)( const struct lp_sampler_dynamic_state *state,
              LLVMBuilderRef builder,
              unsigned unit);

   /** Obtain the base texture depth. */
   /** Obtain the base texture depth (returns int32) */
   LLVMValueRef
   (*depth)( const struct lp_sampler_dynamic_state *state,
             LLVMBuilderRef builder,
             unsigned unit);

   /** Obtain the number of mipmap levels (minus one). */
   /** Obtain the number of mipmap levels minus one (returns int32) */
   LLVMValueRef
   (*last_level)( const struct lp_sampler_dynamic_state *state,
                  LLVMBuilderRef builder,
                  unsigned unit);

   /** Obtain stride in bytes between image rows/blocks (returns int32) */
   LLVMValueRef
   (*row_stride)( const struct lp_sampler_dynamic_state *state,
                  LLVMBuilderRef builder,
                  unsigned unit);

   /** Obtain stride in bytes between image slices (returns int32) */
   LLVMValueRef
   (*img_stride)( const struct lp_sampler_dynamic_state *state,
                  LLVMBuilderRef builder,
                  unsigned unit);

   /** Obtain pointer to array of pointers to mimpap levels */
   LLVMValueRef
   (*data_ptr)( const struct lp_sampler_dynamic_state *state,
                LLVMBuilderRef builder,
                unsigned unit);

   /** Obtain texture min lod (returns float) */
   LLVMValueRef
   (*min_lod)(const struct lp_sampler_dynamic_state *state,
              LLVMBuilderRef builder, unsigned unit);

   /** Obtain texture max lod (returns float) */
   LLVMValueRef
   (*max_lod)(const struct lp_sampler_dynamic_state *state,
              LLVMBuilderRef builder, unsigned unit);

   /** Obtain texture lod bias (returns float) */
   LLVMValueRef
   (*lod_bias)(const struct lp_sampler_dynamic_state *state,
               LLVMBuilderRef builder, unsigned unit);

   /** Obtain texture border color (returns ptr to float[4]) */
   LLVMValueRef
   (*border_color)(const struct lp_sampler_dynamic_state *state,
                   LLVMBuilderRef builder, unsigned unit);
 };


@@ -159,10 +179,16 @@ struct lp_build_sample_context

   const struct util_format_description *format_desc;

   /* See texture_dims() */
   unsigned dims;

   /** regular scalar float type */
   struct lp_type float_type;
   struct lp_build_context float_bld;

   /** float vector type */
   struct lp_build_context float_vec_bld;

   /** regular scalar float type */
   struct lp_type int_type;
   struct lp_build_context int_bld;
@@ -171,17 +197,32 @@ struct lp_build_sample_context
   struct lp_type coord_type;
   struct lp_build_context coord_bld;

   /** Unsigned integer coordinates */
   struct lp_type uint_coord_type;
   struct lp_build_context uint_coord_bld;

   /** Signed integer coordinates */
   struct lp_type int_coord_type;
   struct lp_build_context int_coord_bld;

   /** Unsigned integer texture size */
   struct lp_type int_size_type;
   struct lp_build_context int_size_bld;

   /** Unsigned integer texture size */
   struct lp_type float_size_type;
   struct lp_build_context float_size_bld;

   /** Output texels type and build context */
   struct lp_type texel_type;
   struct lp_build_context texel_bld;

   /* Common dynamic state values */
   LLVMValueRef width;
   LLVMValueRef height;
   LLVMValueRef depth;
   LLVMValueRef row_stride_array;
   LLVMValueRef img_stride_array;
   LLVMValueRef data_array;

   /** Integer vector with texture width, height, depth */
   LLVMValueRef int_size;
 };


@@ -218,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
 }


 static INLINE int
 static INLINE unsigned
 texture_dims(enum pipe_texture_target tex)
 {
   switch (tex) {
@@ -237,6 +278,11 @@ texture_dims(enum pipe_texture_target tex)
 }


 boolean
 lp_sampler_wrap_mode_uses_border_color(unsigned mode,
                                       unsigned min_img_filter,
                                       unsigned mag_img_filter);

 /**
 * Derive the sampler static state.
 */
@@ -246,15 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                        const struct pipe_sampler_state *sampler);


 LLVMValueRef
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                      unsigned unit,
                      const LLVMValueRef ddx[4],
                      const LLVMValueRef ddy[4],
                      LLVMValueRef lod_bias, /* optional */
                      LLVMValueRef explicit_lod, /* optional */
                      LLVMValueRef width,
                      LLVMValueRef height,
                      LLVMValueRef depth);
                      unsigned mip_filter,
                      LLVMValueRef *out_lod_ipart,
                      LLVMValueRef *out_lod_fpart);

 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -265,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *lod_fpart_inout,
                           LLVMValueRef *level0_out,
                           LLVMValueRef *level1_out,
                           LLVMValueRef *weight_out);
                           LLVMValueRef *level1_out);

 LLVMValueRef
 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
                          LLVMValueRef data_array, LLVMValueRef level);
                          LLVMValueRef level);

 LLVMValueRef
 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
                                LLVMValueRef data_array, int level);
                                int level);


 void
 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
                            unsigned dims,
                            LLVMValueRef width_vec,
                            LLVMValueRef height_vec,
                            LLVMValueRef depth_vec,
                            LLVMValueRef ilevel0,
                            LLVMValueRef ilevel1,
                            LLVMValueRef row_stride_array,
                            LLVMValueRef img_stride_array,
                            LLVMValueRef *width0_vec,
                            LLVMValueRef *width1_vec,
                            LLVMValueRef *height0_vec,
                            LLVMValueRef *height1_vec,
                            LLVMValueRef *depth0_vec,
                            LLVMValueRef *depth1_vec,
                            LLVMValueRef *row_stride0_vec,
                            LLVMValueRef *row_stride1_vec,
                            LLVMValueRef *img_stride0_vec,
                            LLVMValueRef *img_stride1_vec);
                            LLVMValueRef ilevel,
                            LLVMValueRef *out_size_vec,
                            LLVMValueRef *row_stride_vec,
                            LLVMValueRef *img_stride_vec);


 void
 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
                             struct lp_type size_type,
                             struct lp_type coord_type,
                             LLVMValueRef size,
                             LLVMValueRef *out_width,
                             LLVMValueRef *out_height,
                             LLVMValueRef *out_depth);


 void
 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
                             LLVMValueRef flt_size,
                             LLVMValueRef *s,
                             LLVMValueRef *t,
                             LLVMValueRef *r);


 void
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -45,6 +45,7 @@
 #include "lp_bld_const.h"
 #include "lp_bld_conv.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_bitarit.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_pack.h"
@@ -80,20 +81,21 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
                                 LLVMValueRef *out_offset,
                                 LLVMValueRef *out_i)
 {
   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
   LLVMValueRef length_minus_one;

   length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);

   switch(wrap_mode) {
   case PIPE_TEX_WRAP_REPEAT:
      if(is_pot)
         coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
      else
         /* Signed remainder won't give the right results for negative
          * dividends but unsigned remainder does.*/
      else {
         /* Add a bias to the texcoord to handle negative coords */
         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
         coord = LLVMBuildAdd(bld->builder, coord, bias, "");
         coord = LLVMBuildURem(bld->builder, coord, length, "");
      }
      break;

   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
@@ -111,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
      assert(0);
   }

   lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
   lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
                                  out_offset, out_i);
 }

@@ -144,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
                                LLVMValueRef *i0,
                                LLVMValueRef *i1)
 {
   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
   LLVMValueRef length_minus_one;
   LLVMValueRef lmask, umask, mask;
@@ -186,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
    * multiplication.
    */

   *i0 = uint_coord_bld->zero;
   *i1 = uint_coord_bld->zero;
   *i0 = int_coord_bld->zero;
   *i1 = int_coord_bld->zero;

   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);

@@ -197,17 +198,18 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
         coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
      }
      else {
         /* Signed remainder won't give the right results for negative
          * dividends but unsigned remainder does.*/
         /* Add a bias to the texcoord to handle negative coords */
         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
         coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
         coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
      }

      mask = lp_build_compare(bld->builder, int_coord_bld->type,
                              PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);

      *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
      *offset1 = LLVMBuildAnd(bld->builder,
                              lp_build_add(uint_coord_bld, *offset0, stride),
                              lp_build_add(int_coord_bld, *offset0, stride),
                              mask, "");
      break;

@@ -222,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,

      mask = LLVMBuildAnd(bld->builder, lmask, umask, "");

      *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
      *offset1 = lp_build_add(uint_coord_bld,
      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
      *offset1 = lp_build_add(int_coord_bld,
                              *offset0,
                              LLVMBuildAnd(bld->builder, stride, mask, ""));
      break;
@@ -236,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
   default:
      assert(0);
      *offset0 = uint_coord_bld->zero;
      *offset1 = uint_coord_bld->zero;
      *offset0 = int_coord_bld->zero;
      *offset1 = int_coord_bld->zero;
      break;
   }
 }
@@ -250,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 */
 static void
 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                              LLVMValueRef width_vec,
                              LLVMValueRef height_vec,
                              LLVMValueRef depth_vec,
                              LLVMValueRef int_size,
                              LLVMValueRef row_stride_vec,
                              LLVMValueRef img_stride_vec,
                              LLVMValueRef data_ptr,
@@ -262,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                              LLVMValueRef *colors_lo,
                              LLVMValueRef *colors_hi)
 {
   const int dims = texture_dims(bld->static_state->target);
   const unsigned dims = bld->dims;
   LLVMBuilderRef builder = bld->builder;
   struct lp_build_context i32, h16, u8n;
   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
   LLVMValueRef i32_c8;
   LLVMValueRef width_vec, height_vec, depth_vec;
   LLVMValueRef s_ipart, t_ipart, r_ipart;
   LLVMValueRef x_stride;
   LLVMValueRef x_offset, offset;
@@ -280,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
   h16_vec_type = lp_build_vec_type(h16.type);
   u8n_vec_type = lp_build_vec_type(u8n.type);

   lp_build_extract_image_sizes(bld,
                                bld->int_size_type,
                                bld->int_coord_type,
                                int_size,
                                &width_vec,
                                &height_vec,
                                &depth_vec);

   if (bld->static_state->normalized_coords) {
      /* s = s * width, t = t * height */
      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
      LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
                                              coord_vec_type, "");
      s = lp_build_mul(&bld->coord_bld, s, fp_width);
      if (dims >= 2) {
         LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
                                                  coord_vec_type, "");
         t = lp_build_mul(&bld->coord_bld, t, fp_height);
         if (dims >= 3) {
            LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
                                                    coord_vec_type, "");
            r = lp_build_mul(&bld->coord_bld, r, fp_depth);
         }
      }
   }
      LLVMValueRef scaled_size;
      LLVMValueRef flt_size;

   /* scale coords by 256 (8 fractional bits) */
   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
   if (dims >= 2)
      t = lp_build_mul_imm(&bld->coord_bld, t, 256);
   if (dims >= 3)
      r = lp_build_mul_imm(&bld->coord_bld, r, 256);
      /* scale size by 256 (8 fractional bits) */
      scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);

      flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);

      lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
   }
   else {
      /* scale coords by 256 (8 fractional bits) */
      s = lp_build_mul_imm(&bld->coord_bld, s, 256);
      if (dims >= 2)
         t = lp_build_mul_imm(&bld->coord_bld, t, 256);
      if (dims >= 3)
         r = lp_build_mul_imm(&bld->coord_bld, r, 256);
   }

   /* convert float to int */
   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -321,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
      r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");

   /* get pixel, row, image strides */
   x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
   x_stride = lp_build_const_vec(bld->int_coord_bld.type,
                                 bld->format_desc->block.bits/8);

   /* Do texcoord wrapping, compute texel offset */
@@ -340,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                       bld->static_state->pot_height,
                                       bld->static_state->wrap_t,
                                       &y_offset, &y_subcoord);
      offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
      offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
      if (dims >= 3) {
         LLVMValueRef z_offset;
         lp_build_sample_wrap_nearest_int(bld,
@@ -349,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                          bld->static_state->pot_height,
                                          bld->static_state->wrap_r,
                                          &z_offset, &z_subcoord);
         offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
      }
      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
         LLVMValueRef z_offset;
         /* The r coord is the cube face in [0,5] */
         z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
         offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
         z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
      }
   }

@@ -414,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 */
 static void
 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                             LLVMValueRef width_vec,
                             LLVMValueRef height_vec,
                             LLVMValueRef depth_vec,
                             LLVMValueRef int_size,
                             LLVMValueRef row_stride_vec,
                             LLVMValueRef img_stride_vec,
                             LLVMValueRef data_ptr,
@@ -426,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                             LLVMValueRef *colors_lo,
                             LLVMValueRef *colors_hi)
 {
   const int dims = texture_dims(bld->static_state->target);
   const unsigned dims = bld->dims;
   LLVMBuilderRef builder = bld->builder;
   struct lp_build_context i32, h16, u8n;
   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
   LLVMValueRef i32_c8, i32_c128, i32_c255;
   LLVMValueRef width_vec, height_vec, depth_vec;
   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
   LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
@@ -455,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
   h16_vec_type = lp_build_vec_type(h16.type);
   u8n_vec_type = lp_build_vec_type(u8n.type);

   lp_build_extract_image_sizes(bld,
                                bld->int_size_type,
                                bld->int_coord_type,
                                int_size,
                                &width_vec,
                                &height_vec,
                                &depth_vec);

   if (bld->static_state->normalized_coords) {
      /* s = s * width, t = t * height */
      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
      LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
                                              coord_vec_type, "");
      s = lp_build_mul(&bld->coord_bld, s, fp_width);
      if (dims >= 2) {
         LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
                                                  coord_vec_type, "");
         t = lp_build_mul(&bld->coord_bld, t, fp_height);
      }
      if (dims >= 3) {
         LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
                                                 coord_vec_type, "");
         r = lp_build_mul(&bld->coord_bld, r, fp_depth);
      }
   }
      LLVMValueRef scaled_size;
      LLVMValueRef flt_size;

   /* scale coords by 256 (8 fractional bits) */
   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
   if (dims >= 2)
      t = lp_build_mul_imm(&bld->coord_bld, t, 256);
   if (dims >= 3)
      r = lp_build_mul_imm(&bld->coord_bld, r, 256);
      /* scale size by 256 (8 fractional bits) */
      scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);

      flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);

      lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
   }
   else {
      /* scale coords by 256 (8 fractional bits) */
      s = lp_build_mul_imm(&bld->coord_bld, s, 256);
      if (dims >= 2)
         t = lp_build_mul_imm(&bld->coord_bld, t, 256);
      if (dims >= 3)
         r = lp_build_mul_imm(&bld->coord_bld, r, 256);
   }

   /* convert float to int */
   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -489,10 +495,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,

   /* subtract 0.5 (add -128) */
   i32_c128 = lp_build_const_int_vec(i32.type, -128);
   if (!bld->static_state->force_nearest_s) {
      s = LLVMBuildAdd(builder, s, i32_c128, "");
   }
   if (dims >= 2 && !bld->static_state->force_nearest_t) {
   s = LLVMBuildAdd(builder, s, i32_c128, "");
   if (dims >= 2) {
      t = LLVMBuildAdd(builder, t, i32_c128, "");
   }
   if (dims >= 3) {
@@ -516,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
      r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");

   /* get pixel, row and image strides */
   x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
   x_stride = lp_build_const_vec(bld->int_coord_bld.type,
                                 bld->format_desc->block.bits/8);
   y_stride = row_stride_vec;
   z_stride = img_stride_vec;
@@ -547,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,

      for (z = 0; z < 2; z++) {
         for (x = 0; x < 2; x++) {
            offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
            offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
                                           offset[z][0][x], y_offset0);
            offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
            offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
                                           offset[z][1][x], y_offset1);
         }
      }
@@ -565,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                      &z_subcoord[0], &z_subcoord[1]);
      for (y = 0; y < 2; y++) {
         for (x = 0; x < 2; x++) {
            offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
                                           offset[0][y][x], z_offset0);
            offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
            offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
                                           offset[1][y][x], z_offset1);
         }
      }
   }
   else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
      LLVMValueRef z_offset;
      z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
      z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
      for (y = 0; y < 2; y++) {
         for (x = 0; x < 2; x++) {
            /* The r coord is the cube face in [0,5] */
            offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
                                           offset[0][y][x], z_offset);
         }
      }
@@ -709,82 +713,56 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
   /*
    * Linear interpolation with 8.8 fixed point.
    */
   if (bld->static_state->force_nearest_s) {
      /* special case 1-D lerp */
      packed_lo = lp_build_lerp(&h16,
                                t_fpart_lo,
                                neighbors_lo[0][0][0],
                                neighbors_lo[0][0][1]);

      packed_hi = lp_build_lerp(&h16,
                                t_fpart_hi,
                                neighbors_hi[0][1][0],
                                neighbors_hi[0][1][0]);
   }
   else if (bld->static_state->force_nearest_t) {
      /* special case 1-D lerp */
   if (dims == 1) {
      /* 1-D lerp */
      packed_lo = lp_build_lerp(&h16,
                                s_fpart_lo,
                                neighbors_lo[0][0][0],
                                neighbors_lo[0][0][1]);
 				s_fpart_lo,
 				neighbors_lo[0][0][0],
 				neighbors_lo[0][0][1]);

      packed_hi = lp_build_lerp(&h16,
                                s_fpart_hi,
                                neighbors_hi[0][0][0],
                                neighbors_hi[0][0][1]);
 				s_fpart_hi,
 				neighbors_hi[0][0][0],
 				neighbors_hi[0][0][1]);
   }
   else {
      /* general 1/2/3-D lerping */
      if (dims == 1) {
         packed_lo = lp_build_lerp(&h16,
                                   s_fpart_lo,
                                   neighbors_lo[0][0][0],
                                   neighbors_lo[0][0][1]);

         packed_hi = lp_build_lerp(&h16,
                                   s_fpart_hi,
                                   neighbors_hi[0][0][0],
                                   neighbors_hi[0][0][1]);
      }
      else {
         /* 2-D lerp */
         packed_lo = lp_build_lerp_2d(&h16,
                                      s_fpart_lo, t_fpart_lo,
                                      neighbors_lo[0][0][0],
                                      neighbors_lo[0][0][1],
                                      neighbors_lo[0][1][0],
                                      neighbors_lo[0][1][1]);

         packed_hi = lp_build_lerp_2d(&h16,
                                      s_fpart_hi, t_fpart_hi,
                                      neighbors_hi[0][0][0],
                                      neighbors_hi[0][0][1],
                                      neighbors_hi[0][1][0],
                                      neighbors_hi[0][1][1]);

         if (dims >= 3) {
            LLVMValueRef packed_lo2, packed_hi2;

            /* lerp in the second z slice */
            packed_lo2 = lp_build_lerp_2d(&h16,
                                          s_fpart_lo, t_fpart_lo,
                                          neighbors_lo[1][0][0],
                                          neighbors_lo[1][0][1],
                                          neighbors_lo[1][1][0],
                                          neighbors_lo[1][1][1]);

            packed_hi2 = lp_build_lerp_2d(&h16,
                                          s_fpart_hi, t_fpart_hi,
                                          neighbors_hi[1][0][0],
                                          neighbors_hi[1][0][1],
                                          neighbors_hi[1][1][0],
                                          neighbors_hi[1][1][1]);
            /* interp between two z slices */
            packed_lo = lp_build_lerp(&h16, r_fpart_lo,
                                      packed_lo, packed_lo2);
            packed_hi = lp_build_lerp(&h16, r_fpart_hi,
                                      packed_hi, packed_hi2);
         }
      /* 2-D lerp */
      packed_lo = lp_build_lerp_2d(&h16,
 				   s_fpart_lo, t_fpart_lo,
 				   neighbors_lo[0][0][0],
 				   neighbors_lo[0][0][1],
 				   neighbors_lo[0][1][0],
 				   neighbors_lo[0][1][1]);

      packed_hi = lp_build_lerp_2d(&h16,
 				   s_fpart_hi, t_fpart_hi,
 				   neighbors_hi[0][0][0],
 				   neighbors_hi[0][0][1],
 				   neighbors_hi[0][1][0],
 				   neighbors_hi[0][1][1]);

      if (dims >= 3) {
 	 LLVMValueRef packed_lo2, packed_hi2;

 	 /* lerp in the second z slice */
 	 packed_lo2 = lp_build_lerp_2d(&h16,
 				       s_fpart_lo, t_fpart_lo,
 				       neighbors_lo[1][0][0],
 				       neighbors_lo[1][0][1],
 				       neighbors_lo[1][1][0],
 				       neighbors_lo[1][1][1]);

 	 packed_hi2 = lp_build_lerp_2d(&h16,
 				       s_fpart_hi, t_fpart_hi,
 				       neighbors_hi[1][0][0],
 				       neighbors_hi[1][0][1],
 				       neighbors_hi[1][1][0],
 				       neighbors_hi[1][1][1]);
 	 /* interp between two z slices */
 	 packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 				   packed_lo, packed_lo2);
 	 packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 				   packed_hi, packed_hi2);
      }
   }

@@ -806,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                       LLVMValueRef s,
                       LLVMValueRef t,
                       LLVMValueRef r,
                       LLVMValueRef ilevel0,
                       LLVMValueRef ilevel1,
                       LLVMValueRef lod_fpart,
                       LLVMValueRef width0_vec,
                       LLVMValueRef width1_vec,
                       LLVMValueRef height0_vec,
                       LLVMValueRef height1_vec,
                       LLVMValueRef depth0_vec,
                       LLVMValueRef depth1_vec,
                       LLVMValueRef row_stride0_vec,
                       LLVMValueRef row_stride1_vec,
                       LLVMValueRef img_stride0_vec,
                       LLVMValueRef img_stride1_vec,
                       LLVMValueRef data_ptr0,
                       LLVMValueRef data_ptr1,
                       LLVMValueRef *colors_lo,
                       LLVMValueRef *colors_hi)
                       LLVMValueRef colors_lo_var,
                       LLVMValueRef colors_hi_var)
 {
   LLVMBuilderRef builder = bld->builder;
   LLVMValueRef size0;
   LLVMValueRef size1;
   LLVMValueRef row_stride0_vec;
   LLVMValueRef row_stride1_vec;
   LLVMValueRef img_stride0_vec;
   LLVMValueRef img_stride1_vec;
   LLVMValueRef data_ptr0;
   LLVMValueRef data_ptr1;
   LLVMValueRef colors0_lo, colors0_hi;
   LLVMValueRef colors1_lo, colors1_hi;


   /* sample the first mipmap level */
   lp_build_mipmap_level_sizes(bld, ilevel0,
                               &size0,
                               &row_stride0_vec, &img_stride0_vec);
   data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
      /* sample the first mipmap level */
      lp_build_sample_image_nearest(bld,
                                    width0_vec, height0_vec, depth0_vec,
                                    size0,
                                    row_stride0_vec, img_stride0_vec,
                                    data_ptr0, s, t, r,
                                    &colors0_lo, &colors0_hi);

      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
         /* sample the second mipmap level */
         lp_build_sample_image_nearest(bld,
                                       width1_vec, height1_vec, depth1_vec,
                                       row_stride1_vec, img_stride1_vec,
                                       data_ptr1, s, t, r,
                                       &colors1_lo, &colors1_hi);
      }
   }
   else {
      assert(img_filter == PIPE_TEX_FILTER_LINEAR);

      /* sample the first mipmap level */
      lp_build_sample_image_linear(bld,
                                   width0_vec, height0_vec, depth0_vec,
                                   size0,
                                   row_stride0_vec, img_stride0_vec,
                                   data_ptr0, s, t, r,
                                   &colors0_lo, &colors0_hi);

      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
         /* sample the second mipmap level */
         lp_build_sample_image_linear(bld,
                                      width1_vec, height1_vec, depth1_vec,
                                      row_stride1_vec, img_stride1_vec,
                                      data_ptr1, s, t, r,
                                      &colors1_lo, &colors1_hi);
      }
   }

   /* Store the first level's colors in the output variables */
   LLVMBuildStore(builder, colors0_lo, colors_lo_var);
   LLVMBuildStore(builder, colors0_hi, colors_hi_var);

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      /* interpolate samples from the two mipmap levels */
      struct lp_build_context h16;
      lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));

      *colors_lo = lp_build_lerp(&h16, lod_fpart,
                                 colors0_lo, colors1_lo);
      *colors_hi = lp_build_lerp(&h16, lod_fpart,
                                 colors0_hi, colors1_hi);
   }
   else {
      /* use first/only level's colors */
      *colors_lo = colors0_lo;
      *colors_hi = colors0_hi;
      LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
      LLVMTypeRef i32_type = LLVMIntType(32);
      struct lp_build_if_state if_ctx;
      LLVMValueRef need_lerp;

      lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
      lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");

      /* need_lerp = lod_fpart > 0 */
      need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
                                lod_fpart, LLVMConstNull(i32_type),
                                "need_lerp");

      lp_build_if(&if_ctx, builder, need_lerp);
      {
         struct lp_build_context h16_bld;

         lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));

         /* sample the second mipmap level */
         lp_build_mipmap_level_sizes(bld, ilevel1,
                                     &size1,
                                     &row_stride1_vec, &img_stride1_vec);
         data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
         if (img_filter == PIPE_TEX_FILTER_NEAREST) {
            lp_build_sample_image_nearest(bld,
                                          size1,
                                          row_stride1_vec, img_stride1_vec,
                                          data_ptr1, s, t, r,
                                          &colors1_lo, &colors1_hi);
         }
         else {
            lp_build_sample_image_linear(bld,
                                         size1,
                                         row_stride1_vec, img_stride1_vec,
                                         data_ptr1, s, t, r,
                                         &colors1_lo, &colors1_hi);
         }

         /* interpolate samples from the two mipmap levels */

         lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
         lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);

 #if HAVE_LLVM == 0x208
         /* This is a work-around for a bug in LLVM 2.8.
          * Evidently, something goes wrong in the construction of the
          * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
          * to force the vector to be properly constructed.
          * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
          */
         {
            LLVMValueRef shuffles[8], shuffle;
            int i;
            assert(h16_bld.type.length <= Elements(shuffles));
            for (i = 0; i < h16_bld.type.length; i++)
               shuffles[i] = lp_build_const_int32(2 * (i & 1));
            shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
            lod_fpart = LLVMBuildShuffleVector(builder,
                                               lod_fpart, lod_fpart,
                                               shuffle, "");
         }
 #endif

         colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
                                    colors0_lo, colors1_lo);
         colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
                                    colors0_hi, colors1_hi);

         LLVMBuildStore(builder, colors0_lo, colors_lo_var);
         LLVMBuildStore(builder, colors0_hi, colors_hi_var);
      }
      lp_build_endif(&if_ctx);
   }
 }

@@ -896,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                    const LLVMValueRef *ddy,
                    LLVMValueRef lod_bias, /* optional */
                    LLVMValueRef explicit_lod, /* optional */
                    LLVMValueRef width,
                    LLVMValueRef height,
                    LLVMValueRef depth,
                    LLVMValueRef width_vec,
                    LLVMValueRef height_vec,
                    LLVMValueRef depth_vec,
                    LLVMValueRef row_stride_array,
                    LLVMValueRef img_stride_array,
                    LLVMValueRef data_array,
                    LLVMValueRef texel_out[4])
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMBuilderRef builder = bld->builder;
   const unsigned mip_filter = bld->static_state->min_mip_filter;
   const unsigned min_filter = bld->static_state->min_img_filter;
   const unsigned mag_filter = bld->static_state->mag_img_filter;
   const int dims = texture_dims(bld->static_state->target);
   LLVMValueRef lod = NULL, lod_fpart = NULL;
   const unsigned dims = bld->dims;
   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
   LLVMValueRef ilevel0, ilevel1 = NULL;
   LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
   LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
   LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
   LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
   LLVMValueRef data_ptr0, data_ptr1 = NULL;
   LLVMValueRef packed, packed_lo, packed_hi;
   LLVMValueRef unswizzled[4];
   LLVMValueRef face_ddx[4], face_ddy[4];
   struct lp_build_context h16;
   LLVMTypeRef h16_vec_type;
   struct lp_build_context h16_bld;
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);

   /* we only support the common/simple wrap modes at this time */
   assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
@@ -935,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,


   /* make 16-bit fixed-pt builder context */
   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
   h16_vec_type = lp_build_vec_type(h16.type);

   lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));

   /* cube face selection, compute pre-face coords, etc. */
   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
@@ -949,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */

      /* recompute ddx, ddy using the new (s,t) face texcoords */
      face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
      face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
      face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
      face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
      face_ddx[2] = NULL;
      face_ddx[3] = NULL;
      face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
      face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
      face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
      face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
      face_ddy[2] = NULL;
      face_ddy[3] = NULL;
      ddx = face_ddx;
      ddy = face_ddy;
   }


   /*
    * Compute the level of detail (float).
    */
@@ -970,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
      /* Need to compute lod either to choose mipmap levels or to
       * distinguish between minification/magnification with one mipmap level.
       */
      lod = lp_build_lod_selector(bld, ddx, ddy,
                                  lod_bias, explicit_lod,
                                  width, height, depth);
      lp_build_lod_selector(bld, unit, ddx, ddy,
                            lod_bias, explicit_lod,
                            mip_filter,
                            &lod_ipart, &lod_fpart);
   } else {
      lod_ipart = i32t_zero;
   }

   /*
    * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
    * If mipfilter=linear, also compute the weight between the two
    * mipmap levels: lod_fpart
    */
   switch (mip_filter) {
   default:
@@ -991,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
          * We should be able to set ilevel0 = const(0) but that causes
          * bad x86 code to be emitted.
          */
         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
         assert(lod_ipart);
         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      }
      else {
         ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
         ilevel0 = i32t_zero;
      }
      break;
   case PIPE_TEX_MIPFILTER_NEAREST:
      assert(lod);
      lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
      assert(lod_ipart);
      lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      break;
   case PIPE_TEX_MIPFILTER_LINEAR:
      {
         LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
         LLVMValueRef i255 = lp_build_const_int32(255);
         LLVMTypeRef i16_type = LLVMIntType(16);

         assert(lod);

         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
                                    &lod_fpart);
         lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
         lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
         lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
         lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
         lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);

         /* the lod_fpart values will be fixed pt values in [0,1) */
      }
      assert(lod_ipart);
      assert(lod_fpart);
      lp_build_linear_mip_levels(bld, unit,
                                 lod_ipart, &lod_fpart,
                                 &ilevel0, &ilevel1);
      break;
   }

   /* compute image size(s) of source mipmap level(s) */
   lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
                               ilevel0, ilevel1,
                               row_stride_array, img_stride_array,
                               &width0_vec, &width1_vec,
                               &height0_vec, &height1_vec,
                               &depth0_vec, &depth1_vec,
                               &row_stride0_vec, &row_stride1_vec,
                               &img_stride0_vec, &img_stride1_vec);

   /*
    * Get pointer(s) to image data for mipmap level(s).
    * Get/interpolate texture colors.
    */
   data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
   }

   packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
   packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");

   /*
    * Get/interpolate texture colors.
    */
   if (min_filter == mag_filter) {
      /* no need to distinquish between minification and magnification */
      lp_build_sample_mipmap(bld, min_filter, mip_filter,
                             s, t, r, lod_fpart,
                             width0_vec, width1_vec,
                             height0_vec, height1_vec,
                             depth0_vec, depth1_vec,
                             row_stride0_vec, row_stride1_vec,
                             img_stride0_vec, img_stride1_vec,
                             data_ptr0, data_ptr1,
                             &packed_lo, &packed_hi);
      lp_build_sample_mipmap(bld,
                             min_filter, mip_filter,
                             s, t, r,
                             ilevel0, ilevel1, lod_fpart,
                             packed_lo, packed_hi);
   }
   else {
      /* Emit conditional to choose min image filter or mag image filter
       * depending on the lod being > 0 or <= 0, respectively.
       */
      struct lp_build_flow_context *flow_ctx;
      struct lp_build_if_state if_ctx;
      LLVMValueRef minify;

      flow_ctx = lp_build_flow_create(builder);
      lp_build_flow_scope_begin(flow_ctx);

      packed_lo = LLVMGetUndef(h16_vec_type);
      packed_hi = LLVMGetUndef(h16_vec_type);
      /* minify = lod >= 0.0 */
      minify = LLVMBuildICmp(builder, LLVMIntSGE,
                             lod_ipart, int_bld->zero, "");

      lp_build_flow_scope_declare(flow_ctx, &packed_lo);
      lp_build_flow_scope_declare(flow_ctx, &packed_hi);

      /* minify = lod > 0.0 */
      minify = LLVMBuildFCmp(builder, LLVMRealUGE,
                             lod, float_bld->zero, "");

      lp_build_if(&if_ctx, flow_ctx, builder, minify);
      lp_build_if(&if_ctx, builder, minify);
      {
         /* Use the minification filter */
         lp_build_sample_mipmap(bld, min_filter, mip_filter,
                                s, t, r, lod_fpart,
                                width0_vec, width1_vec,
                                height0_vec, height1_vec,
                                depth0_vec, depth1_vec,
                                row_stride0_vec, row_stride1_vec,
                                img_stride0_vec, img_stride1_vec,
                                data_ptr0, data_ptr1,
                                &packed_lo, &packed_hi);
         lp_build_sample_mipmap(bld,
                                min_filter, mip_filter,
                                s, t, r,
                                ilevel0, ilevel1, lod_fpart,
                                packed_lo, packed_hi);
      }
      lp_build_else(&if_ctx);
      {
         /* Use the magnification filter */
         lp_build_sample_mipmap(bld, mag_filter, mip_filter,
                                s, t, r, lod_fpart,
                                width0_vec, width1_vec,
                                height0_vec, height1_vec,
                                depth0_vec, depth1_vec,
                                row_stride0_vec, row_stride1_vec,
                                img_stride0_vec, img_stride1_vec,
                                data_ptr0, data_ptr1,
                                &packed_lo, &packed_hi);
         lp_build_sample_mipmap(bld, 
                                mag_filter, PIPE_TEX_MIPFILTER_NONE,
                                s, t, r,
                                i32t_zero, NULL, NULL,
                                packed_lo, packed_hi);
      }
      lp_build_endif(&if_ctx);

      lp_build_flow_scope_end(flow_ctx);
      lp_build_flow_destroy(flow_ctx);
   }

   /* combine 'packed_lo', 'packed_hi' into 'packed' */
   {
      struct lp_build_context h16, u8n;

      lp_build_context_init(&h16, builder, lp_type_ufixed(16));
      lp_build_context_init(&u8n, builder, lp_type_unorm(8));

      packed = lp_build_pack2(builder, h16.type, u8n.type,
                              packed_lo, packed_hi);
   }
   /*
    * combine the values stored in 'packed_lo' and 'packed_hi' variables
    * into 'packed'
    */
   packed = lp_build_pack2(builder,
                           h16_bld.type, lp_type_unorm(8),
                           LLVMBuildLoad(builder, packed_lo, ""),
                           LLVMBuildLoad(builder, packed_hi, ""));

   /*
    * Convert to SoA and swizzle.
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                    const LLVMValueRef *ddy,
                    LLVMValueRef lod_bias, /* optional */
                    LLVMValueRef explicit_lod, /* optional */
                    LLVMValueRef width,
                    LLVMValueRef height,
                    LLVMValueRef depth,
                    LLVMValueRef width_vec,
                    LLVMValueRef height_vec,
                    LLVMValueRef depth_vec,
                    LLVMValueRef row_stride_array,
                    LLVMValueRef img_stride_array,
                    LLVMValueRef data_array,
                    LLVMValueRef texel_out[4]);


--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -100,6 +100,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
 }


 /**
 * Combined extract and broadcast (or a mere shuffle when the two types match)
 */
 LLVMValueRef
 lp_build_extract_broadcast(LLVMBuilderRef builder,
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index)
 {
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMValueRef res;

   assert(src_type.floating == dst_type.floating);
   assert(src_type.width    == dst_type.width);

   assert(lp_check_value(src_type, vector));
   assert(LLVMTypeOf(index) == i32t);

   if (src_type.length == 1) {
      if (dst_type.length == 1) {
         /*
          * Trivial scalar -> scalar.
          */

         res = vector;
      }
      else {
         /*
          * Broadcast scalar -> vector.
          */

         res = lp_build_broadcast(builder,
                                  lp_build_vec_type(dst_type),
                                  vector);
      }
   }
   else {
      if (dst_type.length == src_type.length) {
         /*
          * Special shuffle of the same size.
          */

         LLVMValueRef shuffle;
         shuffle = lp_build_broadcast(builder,
                                      LLVMVectorType(i32t, dst_type.length),
                                      index);
         res = LLVMBuildShuffleVector(builder, vector,
                                      LLVMGetUndef(lp_build_vec_type(dst_type)),
                                      shuffle, "");
      }
      else {
         LLVMValueRef scalar;
         scalar = LLVMBuildExtractElement(builder, vector, index, "");
         if (dst_type.length == 1) {
            /*
             * Trivial extract scalar from vector.
             */

            res = scalar;
         }
         else {
            /*
             * General case of different sized vectors.
             */

            res = lp_build_broadcast(builder,
                                     lp_build_vec_type(dst_type),
                                     vector);
         }
      }
   }

   return res;
 }


 /**
 * Swizzle one channel into all other three channels.
 */
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
                          LLVMValueRef scalar);


 LLVMValueRef
 lp_build_extract_broadcast(LLVMBuilderRef builder,
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index);


 /**
 * Broadcast one channel of a vector composed of arrays of XYZW structures into
 * all four channel.
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -36,6 +36,9 @@
 #define LP_BLD_TGSI_H

 #include "gallivm/lp_bld.h"
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"


 struct tgsi_token;
@@ -54,6 +57,75 @@ enum lp_build_tex_modifier {
 };


 /**
 * Describe a channel of a register.
 *
 * The value can be a:
 * - immediate value (i.e. derived from a IMM register)
 * - CONST[n].x/y/z/w
 * - IN[n].x/y/z/w
 * - undetermined (when .file == TGSI_FILE_NULL)
 *
 * This is one of the analysis results, and is used to described
 * the output color in terms of inputs.
 */
 struct lp_tgsi_channel_info
 {
   unsigned file:4; /* TGSI_FILE_* */
   unsigned swizzle:3; /* PIPE_SWIZZLE_x */
   union {
      uint32_t index;
      float value; /* for TGSI_FILE_IMMEDIATE */
   } u;
 };


 /**
 * Describe a texture sampler interpolator.
 *
 * The interpolation is described in terms of regular inputs.
 */
 struct lp_tgsi_texture_info
 {
   struct lp_tgsi_channel_info coord[4];
   unsigned target:8; /* TGSI_TEXTURE_* */
   unsigned unit:8;  /* Sampler unit */
   unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
 };


 struct lp_tgsi_info
 {
   struct tgsi_shader_info base;

   /*
    * Whether any of the texture opcodes access a register file other than
    * TGSI_FILE_INPUT.
    *
    * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
    * benefit.
    */
   unsigned indirect_textures:1;

   /*
    * Texture opcode description. Aimed at detecting and described direct
    * texture opcodes.
    */
   unsigned num_texs;
   struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];

   /*
    * Output description. Aimed at detecting and describing simple blit
    * shaders.
    */
   struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];

   /*
    * Shortcut pointers into the above (for fragment shaders).
    */
   const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
 };

 /**
 * Sampler code generation interface.
 *
@@ -96,6 +168,11 @@ struct lp_build_sampler_aos
 };


 void
 lp_build_tgsi_info(const struct tgsi_token *tokens,
                   struct lp_tgsi_info *info);


 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                  const struct tgsi_token *tokens,
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -0,0 +1,479 @@
 /**************************************************************************
 *
 * Copyright 2010 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 **************************************************************************/


 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_dump.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_tgsi.h"


 /**
 * Analysis context.
 *
 * This is where we keep store the value of each channel of the IMM/TEMP/OUT
 * register values, as we walk the shader.
 */
 struct analysis_context
 {
   struct lp_tgsi_info *info;

   unsigned num_imms;
   float imm[32][4];

   struct lp_tgsi_channel_info temp[32][4];
 };


 /**
 * Describe the specified channel of the src register.
 */
 static void
 analyse_src(struct analysis_context *ctx,
            struct lp_tgsi_channel_info *chan_info,
            const struct tgsi_src_register *src,
            unsigned chan)
 {
   chan_info->file = TGSI_FILE_NULL;
   if (!src->Indirect && !src->Absolute && !src->Negate) {
      unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan);
      if (src->File == TGSI_FILE_TEMPORARY) {
         if (src->Index < Elements(ctx->temp)) {
            *chan_info = ctx->temp[src->Index][swizzle];
         }
      } else {
         chan_info->file = src->File;
         if (src->File == TGSI_FILE_IMMEDIATE) {
            assert(src->Index < Elements(ctx->imm));
            if (src->Index < Elements(ctx->imm)) {
               chan_info->u.value = ctx->imm[src->Index][swizzle];
            }
         } else {
            chan_info->u.index = src->Index;
            chan_info->swizzle = swizzle;
         }
      }
   }
 }


 /**
 * Whether this register channel refers to a specific immediate value.
 */
 static boolean
 is_immediate(const struct lp_tgsi_channel_info *chan_info, float value)
 {
   return chan_info->file == TGSI_FILE_IMMEDIATE &&
          chan_info->u.value == value;
 }


 static void
 analyse_tex(struct analysis_context *ctx,
            const struct tgsi_full_instruction *inst,
            enum lp_build_tex_modifier modifier)
 {
   struct lp_tgsi_info *info = ctx->info;
   unsigned chan;

   if (info->num_texs < Elements(info->tex)) {
      struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
      bool indirect = FALSE;
      unsigned readmask = 0;

      tex_info->target = inst->Texture.Texture;
      switch (inst->Texture.Texture) {
      case TGSI_TEXTURE_1D:
         readmask = TGSI_WRITEMASK_X;
         break;
      case TGSI_TEXTURE_2D:
      case TGSI_TEXTURE_RECT:
         readmask = TGSI_WRITEMASK_XY;
         break;
      case TGSI_TEXTURE_SHADOW1D:
      case TGSI_TEXTURE_SHADOW2D:
      case TGSI_TEXTURE_SHADOWRECT:
      case TGSI_TEXTURE_3D:
      case TGSI_TEXTURE_CUBE:
         readmask = TGSI_WRITEMASK_XYZ;
         break;
      default:
         assert(0);
         return;
      }

      if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
         /* We don't track explicit derivatives, although we could */
         indirect = TRUE;
         tex_info->unit = inst->Src[3].Register.Index;
      }  else {
         if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
             modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
             modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
            readmask |= TGSI_WRITEMASK_W;
         }
         tex_info->unit = inst->Src[1].Register.Index;
      }

      for (chan = 0; chan < 4; ++chan) {
         struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan];
         if (readmask & (1 << chan)) {
            analyse_src(ctx, chan_info, &inst->Src[0].Register, chan);
            if (chan_info->file != TGSI_FILE_INPUT) {
               indirect = TRUE;
            }
         } else {
            memset(chan_info, 0, sizeof *chan_info);
         }
      }

      if (indirect) {
         info->indirect_textures = TRUE;
      }

      ++info->num_texs;
   } else {
      info->indirect_textures = TRUE;
   }
 }


 /**
 * Process an instruction, and update the register values accordingly.
 */
 static void
 analyse_instruction(struct analysis_context *ctx,
                    struct tgsi_full_instruction *inst)
 {
   struct lp_tgsi_info *info = ctx->info;
   struct lp_tgsi_channel_info (*regs)[4];
   unsigned max_regs;
   unsigned i;
   unsigned index;
   unsigned chan;

   for (i = 0; i < inst->Instruction.NumDstRegs; ++i) {
      const struct tgsi_dst_register *dst = &inst->Dst[i].Register;

      /*
       * Get the lp_tgsi_channel_info array corresponding to the destination
       * register file.
       */

      if (dst->File == TGSI_FILE_TEMPORARY) {
         regs = ctx->temp;
         max_regs = Elements(ctx->temp);
      } else if (dst->File == TGSI_FILE_OUTPUT) {
         regs = info->output;
         max_regs = Elements(info->output);
      } else if (dst->File == TGSI_FILE_ADDRESS ||
                 dst->File == TGSI_FILE_PREDICATE) {
         continue;
      } else {
         assert(0);
         continue;
      }

      /*
       * Detect direct TEX instructions
       */

      switch (inst->Instruction.Opcode) {
      case TGSI_OPCODE_TEX:
         analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
         break;
      case TGSI_OPCODE_TXD:
         analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
         break;
      case TGSI_OPCODE_TXB:
         analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
         break;
      case TGSI_OPCODE_TXL:
         analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
         break;
      case TGSI_OPCODE_TXP:
         analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
         break;
      default:
         break;
      }

      /*
       * Keep track of assignments and writes
       */

      if (dst->Indirect) {
         /*
          * It could be any register index so clear all register indices.
          */

         for (chan = 0; chan < 4; ++chan) {
            if (dst->WriteMask & (1 << chan)) {
               for (index = 0; index < max_regs; ++index) {
                  regs[index][chan].file = TGSI_FILE_NULL;
               }
            }
         }
      } else if (dst->Index < max_regs) {
         /*
          * Update this destination register value.
          */

         struct lp_tgsi_channel_info res[4];

         memset(res, 0, sizeof res);

         if (!inst->Instruction.Predicate &&
             !inst->Instruction.Saturate) {
            for (chan = 0; chan < 4; ++chan) {
               if (dst->WriteMask & (1 << chan)) {
                  if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
                     analyse_src(ctx, &res[chan],
                                 &inst->Src[0].Register, chan);
                  } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
                     /*
                      * Propagate values across 1.0 and 0.0 multiplications.
                      */

                     struct lp_tgsi_channel_info src0;
                     struct lp_tgsi_channel_info src1;

                     analyse_src(ctx, &src0, &inst->Src[0].Register, chan);
                     analyse_src(ctx, &src1, &inst->Src[1].Register, chan);

                     if (is_immediate(&src0, 0.0f)) {
                        res[chan] = src0;
                     } else if (is_immediate(&src1, 0.0f)) {
                        res[chan] = src1;
                     } else if (is_immediate(&src0, 1.0f)) {
                        res[chan] = src1;
                     } else if (is_immediate(&src1, 1.0f)) {
                        res[chan] = src0;
                     }
                  }
               }
            }
         }

         for (chan = 0; chan < 4; ++chan) {
            if (dst->WriteMask & (1 << chan)) {
               regs[dst->Index][chan] = res[chan];
            }
         }
      }
   }

   /*
    * Clear all temporaries information in presence of a control flow opcode.
    */

   switch (inst->Instruction.Opcode) {
   case TGSI_OPCODE_IF:
   case TGSI_OPCODE_IFC:
   case TGSI_OPCODE_ELSE:
   case TGSI_OPCODE_ENDIF:
   case TGSI_OPCODE_BGNLOOP:
   case TGSI_OPCODE_BRK:
   case TGSI_OPCODE_BREAKC:
   case TGSI_OPCODE_CONT:
   case TGSI_OPCODE_ENDLOOP:
   case TGSI_OPCODE_CALLNZ:
   case TGSI_OPCODE_CAL:
   case TGSI_OPCODE_BGNSUB:
   case TGSI_OPCODE_ENDSUB:
   case TGSI_OPCODE_SWITCH:
   case TGSI_OPCODE_CASE:
   case TGSI_OPCODE_DEFAULT:
   case TGSI_OPCODE_ENDSWITCH:
   case TGSI_OPCODE_RET:
   case TGSI_OPCODE_END:
      /* XXX: Are there more cases? */
      memset(&ctx->temp, 0, sizeof ctx->temp);
      memset(&info->output, 0, sizeof info->output);
   default:
      break;
   }
 }


 static INLINE void
 dump_info(const struct tgsi_token *tokens,
          struct lp_tgsi_info *info)
 {
   unsigned index;
   unsigned chan;

   tgsi_dump(tokens, 0);

   for (index = 0; index < info->num_texs; ++index) {
      const struct lp_tgsi_texture_info *tex_info = &info->tex[index];
      debug_printf("TEX[%u] =", index);
      for (chan = 0; chan < 4; ++chan) {
         const struct lp_tgsi_channel_info *chan_info =
               &tex_info->coord[chan];
         if (chan_info->file != TGSI_FILE_NULL) {
            debug_printf(" %s[%u].%c",
                         tgsi_file_names[chan_info->file],
                         chan_info->u.index,
                         "xyzw01"[chan_info->swizzle]);
         } else {
            debug_printf(" _");
         }
      }
      debug_printf(", SAMP[%u], %s\n",
                   tex_info->unit,
                   tgsi_texture_names[tex_info->target]);
   }

   for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) {
      for (chan = 0; chan < 4; ++chan) {
         const struct lp_tgsi_channel_info *chan_info =
               &info->output[index][chan];
         if (chan_info->file != TGSI_FILE_NULL) {
            debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]);
            if (chan_info->file == TGSI_FILE_IMMEDIATE) {
               debug_printf("%f", chan_info->u.value);
            } else {
               const char *file_name;
               switch (chan_info->file) {
               case TGSI_FILE_CONSTANT:
                  file_name = "CONST";
                  break;
               case TGSI_FILE_INPUT:
                  file_name = "IN";
                  break;
               default:
                  file_name = "???";
                  break;
               }
               debug_printf("%s[%u].%c",
                            file_name,
                            chan_info->u.index,
                            "xyzw01"[chan_info->swizzle]);
            }
            debug_printf("\n");
         }
      }
   }
 }


 /**
 * Detect any direct relationship between the output color
 */
 void
 lp_build_tgsi_info(const struct tgsi_token *tokens,
                   struct lp_tgsi_info *info)
 {
   struct tgsi_parse_context parse;
   struct analysis_context ctx;
   unsigned index;
   unsigned chan;

   memset(info, 0, sizeof *info);

   tgsi_scan_shader(tokens, &info->base);

   memset(&ctx, 0, sizeof ctx);
   ctx.info = info;

   tgsi_parse_init(&parse, tokens);

   while (!tgsi_parse_end_of_tokens(&parse)) {
      tgsi_parse_token(&parse);

      switch (parse.FullToken.Token.Type) {
      case TGSI_TOKEN_TYPE_DECLARATION:
         break;

      case TGSI_TOKEN_TYPE_INSTRUCTION:
         {
            struct tgsi_full_instruction *inst =
                  &parse.FullToken.FullInstruction;

            if (inst->Instruction.Opcode == TGSI_OPCODE_END ||
                inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
               /* We reached the end of main function body. */
               goto finished;
            }

            analyse_instruction(&ctx, inst);
         }
         break;

      case TGSI_TOKEN_TYPE_IMMEDIATE:
         {
            const unsigned size =
                  parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
            assert(size <= 4);
            if (ctx.num_imms < Elements(ctx.imm)) {
               for (chan = 0; chan < size; ++chan) {
                  ctx.imm[ctx.num_imms][chan] =
                        parse.FullToken.FullImmediate.u[chan].Float;
               }
               ++ctx.num_imms;
            }
         }
         break;

      case TGSI_TOKEN_TYPE_PROPERTY:
         break;

      default:
         assert(0);
      }
   }
 finished:

   tgsi_parse_free(&parse);


   /*
    * Link the output color values.
    */

   for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) {
      const struct lp_tgsi_channel_info null_output[4];
      info->cbuf[index] = null_output;
   }

   for (index = 0; index < info->base.num_outputs; ++index) {
      unsigned semantic_name = info->base.output_semantic_name[index];
      unsigned semantic_index = info->base.output_semantic_index[index];
      if (semantic_name == TGSI_SEMANTIC_COLOR &&
          semantic_index < PIPE_MAX_COLOR_BUFS) {
         info->cbuf[semantic_index] = info->output[index];
      }
   }

   if (gallivm_debug & GALLIVM_DEBUG_TGSI) {
      dump_info(tokens, info);
   }
 }
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
   }

   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
      LLVMTypeRef i32t = LLVMInt32Type();
      LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
      for (i = 0; i < num_coords; i++) {
         ddx[i] = emit_fetch( bld, inst, 1, i );
         ddy[i] = emit_fetch( bld, inst, 2, i );
         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
         ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
         ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
      }
      unit = inst->Src[3].Register.Index;
   }  else {
      for (i = 0; i < num_coords; i++) {
         ddx[i] = lp_build_ddx( &bld->base, coords[i] );
         ddy[i] = lp_build_ddy( &bld->base, coords[i] );
         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
      }
      unit = inst->Src[1].Register.Index;
   }
   for (i = num_coords; i < 3; i++) {
      ddx[i] = bld->base.undef;
      ddy[i] = bld->base.undef;
      ddx[i] = LLVMGetUndef(bld->base.elem_type);
      ddy[i] = LLVMGetUndef(bld->base.elem_type);
   }

   bld->sampler->emit_fetch_texel(bld->sampler,
@@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
                                  texel);
 }

 static boolean
 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
 		   int pc)
 {
   int i;

   for (i = 0; i < 5; i++) {
      unsigned opcode;

      if (pc + i >= bld->info->num_instructions)
 	 return TRUE;

      opcode = bld->instructions[pc + i].Instruction.Opcode;

      if (opcode == TGSI_OPCODE_END)
 	 return TRUE;

      if (opcode == TGSI_OPCODE_TEX ||
 	  opcode == TGSI_OPCODE_TXP ||
 	  opcode == TGSI_OPCODE_TXD ||
 	  opcode == TGSI_OPCODE_TXB ||
 	  opcode == TGSI_OPCODE_TXL ||
 	  opcode == TGSI_OPCODE_TXF ||
 	  opcode == TGSI_OPCODE_TXQ ||
 	  opcode == TGSI_OPCODE_CAL ||
 	  opcode == TGSI_OPCODE_CALLNZ ||
 	  opcode == TGSI_OPCODE_IF ||
 	  opcode == TGSI_OPCODE_IFC ||
 	  opcode == TGSI_OPCODE_BGNLOOP ||
 	  opcode == TGSI_OPCODE_SWITCH)
 	 return FALSE;
   }

   return TRUE;
 }



 /**
 * Kill fragment if any of the src register values are negative.
@@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
 static void
 emit_kil(
   struct lp_build_tgsi_soa_context *bld,
   const struct tgsi_full_instruction *inst )
   const struct tgsi_full_instruction *inst,
   int pc)
 {
   const struct tgsi_full_src_register *reg = &inst->Src[0];
   LLVMValueRef terms[NUM_CHANNELS];
@@ -959,8 +1001,12 @@ emit_kil(
      }
   }

   if(mask)
   if(mask) {
      lp_build_mask_update(bld->mask, mask);

      if (!near_end_of_shader(bld, pc))
 	 lp_build_mask_check(bld->mask);
   }
 }


@@ -972,7 +1018,8 @@ emit_kil(
 */
 static void
 emit_kilp(struct lp_build_tgsi_soa_context *bld,
          const struct tgsi_full_instruction *inst)
          const struct tgsi_full_instruction *inst,
 	  int pc)
 {
   LLVMValueRef mask;

@@ -987,6 +1034,9 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
   }

   lp_build_mask_update(bld->mask, mask);

   if (!near_end_of_shader(bld, pc))
      lp_build_mask_check(bld->mask);
 }

 static void
@@ -1535,12 +1585,12 @@ emit_instruction(

   case TGSI_OPCODE_KILP:
      /* predicated kill */
      emit_kilp( bld, inst );
      emit_kilp( bld, inst, (*pc)-1 );
      break;

   case TGSI_OPCODE_KIL:
      /* conditional kill */
      emit_kil( bld, inst );
      emit_kil( bld, inst, (*pc)-1 );
      break;

   case TGSI_OPCODE_PK2H:
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = {
 };


 static INLINE boolean
 static INLINE int
 pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,  
                          pb_size size,
                          const struct pb_desc *desc)
@@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
   void *map;

   if(buf->base.base.size < size)
      return FALSE;
      return 0;

   /* be lenient with size */
   if(buf->base.base.size >= 2*size)
      return FALSE;
      return 0;
   
   if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
      return FALSE;
      return 0;
   
   if(!pb_check_usage(desc->usage, buf->base.base.usage))
      return FALSE;
      return 0;

   map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
   if (!map) {
      return FALSE;
      return -1;
   }

   pb_unmap(buf->buffer);
   
   return TRUE;
   return 1;
 }


@@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
   struct pb_cache_buffer *curr_buf;
   struct list_head *curr, *next;
   int64_t now;
   
   int ret = 0;

   pipe_mutex_lock(mgr->mutex);

   buf = NULL;
@@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
   now = os_time_get();
   while(curr != &mgr->delayed) {
      curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
      if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
 	 buf = curr_buf;
      if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
         buf = curr_buf;
      else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
 	 _pb_cache_buffer_destroy(curr_buf);
         _pb_cache_buffer_destroy(curr_buf);
      else
         /* This buffer (and all hereafter) are still hot in cache */
         break;
      if (ret == -1)
         break;
      curr = next; 
      next = curr->next;
   }

   /* keep searching in the hot buffers */
   if(!buf) {
   if(!buf && ret != -1) {
      while(curr != &mgr->delayed) {
         curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
         if(pb_cache_is_buffer_compat(curr_buf, size, desc)) {
         ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
         if (ret > 0) {
            buf = curr_buf;
            break;
         }
         if (ret == -1)
            break;
         /* no need to check the timeout here */
         curr = next;
         next = curr->next;
@@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
   
   if(buf) {
      LIST_DEL(&buf->head);
      --mgr->numDelayed;
      pipe_mutex_unlock(mgr->mutex);
      /* Increase refcount */
      pipe_reference_init(&buf->base.base.reference, 1);
--- a/src/gallium/auxiliary/rbug/rbug_context.c
+++ b/src/gallium/auxiliary/rbug/rbug_context.c
@@ -480,7 +480,7 @@ struct rbug_proto_context_list * rbug_demarshal_context_list(struct rbug_proto_h

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST)
 		return NULL;

 	pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_context_info * rbug_demarshal_context_info(struct rbug_proto_h

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO)
 		return NULL;

 	pos = 0;
@@ -533,7 +533,7 @@ struct rbug_proto_context_draw_block * rbug_demarshal_context_draw_block(struct

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
 		return NULL;

 	pos = 0;
@@ -561,7 +561,7 @@ struct rbug_proto_context_draw_step * rbug_demarshal_context_draw_step(struct rb

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_STEP)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_STEP)
 		return NULL;

 	pos = 0;
@@ -589,7 +589,7 @@ struct rbug_proto_context_draw_unblock * rbug_demarshal_context_draw_unblock(str

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
 		return NULL;

 	pos = 0;
@@ -617,7 +617,7 @@ struct rbug_proto_context_draw_rule * rbug_demarshal_context_draw_rule(struct rb

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_RULE)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_RULE)
 		return NULL;

 	pos = 0;
@@ -649,7 +649,7 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_FLUSH)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_FLUSH)
 		return NULL;

 	pos = 0;
@@ -677,7 +677,7 @@ struct rbug_proto_context_list_reply * rbug_demarshal_context_list_reply(struct

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST_REPLY)
 		return NULL;

 	pos = 0;
@@ -705,7 +705,7 @@ struct rbug_proto_context_info_reply * rbug_demarshal_context_info_reply(struct

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO_REPLY)
 		return NULL;

 	pos = 0;
@@ -739,7 +739,7 @@ struct rbug_proto_context_draw_blocked * rbug_demarshal_context_draw_blocked(str

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
 	if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
 		return NULL;

 	pos = 0;
--- a/src/gallium/auxiliary/rbug/rbug_core.c
+++ b/src/gallium/auxiliary/rbug/rbug_core.c
@@ -233,7 +233,7 @@ struct rbug_proto_noop * rbug_demarshal_noop(struct rbug_proto_header *header)

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_NOOP)
 	if (header->opcode != (int32_t)RBUG_OP_NOOP)
 		return NULL;

 	pos = 0;
@@ -259,7 +259,7 @@ struct rbug_proto_ping * rbug_demarshal_ping(struct rbug_proto_header *header)

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_PING)
 	if (header->opcode != (int32_t)RBUG_OP_PING)
 		return NULL;

 	pos = 0;
@@ -285,7 +285,7 @@ struct rbug_proto_error * rbug_demarshal_error(struct rbug_proto_header *header)

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_ERROR)
 	if (header->opcode != (int32_t)RBUG_OP_ERROR)
 		return NULL;

 	pos = 0;
@@ -312,7 +312,7 @@ struct rbug_proto_ping_reply * rbug_demarshal_ping_reply(struct rbug_proto_heade

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_PING_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_PING_REPLY)
 		return NULL;

 	pos = 0;
@@ -339,7 +339,7 @@ struct rbug_proto_error_reply * rbug_demarshal_error_reply(struct rbug_proto_hea

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_ERROR_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_ERROR_REPLY)
 		return NULL;

 	pos = 0;
--- a/src/gallium/auxiliary/rbug/rbug_demarshal.c
+++ b/src/gallium/auxiliary/rbug/rbug_demarshal.c
@@ -91,3 +91,67 @@ struct rbug_header * rbug_demarshal(struct rbug_proto_header *header)
 		return NULL;
 	}
 }

 const char* rbug_proto_get_name(enum rbug_opcode opcode)
 {
 	switch(opcode) {
 	case RBUG_OP_NOOP:
 		return "RBUG_OP_NOOP";
 	case RBUG_OP_PING:
 		return "RBUG_OP_PING";
 	case RBUG_OP_ERROR:
 		return "RBUG_OP_ERROR";
 	case RBUG_OP_PING_REPLY:
 		return "RBUG_OP_PING_REPLY";
 	case RBUG_OP_ERROR_REPLY:
 		return "RBUG_OP_ERROR_REPLY";
 	case RBUG_OP_TEXTURE_LIST:
 		return "RBUG_OP_TEXTURE_LIST";
 	case RBUG_OP_TEXTURE_INFO:
 		return "RBUG_OP_TEXTURE_INFO";
 	case RBUG_OP_TEXTURE_WRITE:
 		return "RBUG_OP_TEXTURE_WRITE";
 	case RBUG_OP_TEXTURE_READ:
 		return "RBUG_OP_TEXTURE_READ";
 	case RBUG_OP_TEXTURE_LIST_REPLY:
 		return "RBUG_OP_TEXTURE_LIST_REPLY";
 	case RBUG_OP_TEXTURE_INFO_REPLY:
 		return "RBUG_OP_TEXTURE_INFO_REPLY";
 	case RBUG_OP_TEXTURE_READ_REPLY:
 		return "RBUG_OP_TEXTURE_READ_REPLY";
 	case RBUG_OP_CONTEXT_LIST:
 		return "RBUG_OP_CONTEXT_LIST";
 	case RBUG_OP_CONTEXT_INFO:
 		return "RBUG_OP_CONTEXT_INFO";
 	case RBUG_OP_CONTEXT_DRAW_BLOCK:
 		return "RBUG_OP_CONTEXT_DRAW_BLOCK";
 	case RBUG_OP_CONTEXT_DRAW_STEP:
 		return "RBUG_OP_CONTEXT_DRAW_STEP";
 	case RBUG_OP_CONTEXT_DRAW_UNBLOCK:
 		return "RBUG_OP_CONTEXT_DRAW_UNBLOCK";
 	case RBUG_OP_CONTEXT_DRAW_RULE:
 		return "RBUG_OP_CONTEXT_DRAW_RULE";
 	case RBUG_OP_CONTEXT_FLUSH:
 		return "RBUG_OP_CONTEXT_FLUSH";
 	case RBUG_OP_CONTEXT_LIST_REPLY:
 		return "RBUG_OP_CONTEXT_LIST_REPLY";
 	case RBUG_OP_CONTEXT_INFO_REPLY:
 		return "RBUG_OP_CONTEXT_INFO_REPLY";
 	case RBUG_OP_CONTEXT_DRAW_BLOCKED:
 		return "RBUG_OP_CONTEXT_DRAW_BLOCKED";
 	case RBUG_OP_SHADER_LIST:
 		return "RBUG_OP_SHADER_LIST";
 	case RBUG_OP_SHADER_INFO:
 		return "RBUG_OP_SHADER_INFO";
 	case RBUG_OP_SHADER_DISABLE:
 		return "RBUG_OP_SHADER_DISABLE";
 	case RBUG_OP_SHADER_REPLACE:
 		return "RBUG_OP_SHADER_REPLACE";
 	case RBUG_OP_SHADER_LIST_REPLY:
 		return "RBUG_OP_SHADER_LIST_REPLY";
 	case RBUG_OP_SHADER_INFO_REPLY:
 		return "RBUG_OP_SHADER_INFO_REPLY";
 	default:
 		return NULL;
 	}
 }
--- a/src/gallium/auxiliary/rbug/rbug_proto.h
+++ b/src/gallium/auxiliary/rbug/rbug_proto.h
@@ -91,4 +91,9 @@ struct rbug_proto_header
 */
 struct rbug_connection;

 /**
 * Get printable string for opcode.
 */
 const char* rbug_proto_get_name(enum rbug_opcode opcode);

 #endif
--- a/src/gallium/auxiliary/rbug/rbug_shader.c
+++ b/src/gallium/auxiliary/rbug/rbug_shader.c
@@ -305,7 +305,7 @@ struct rbug_proto_shader_list * rbug_demarshal_shader_list(struct rbug_proto_hea

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST)
 	if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST)
 		return NULL;

 	pos = 0;
@@ -332,7 +332,7 @@ struct rbug_proto_shader_info * rbug_demarshal_shader_info(struct rbug_proto_hea

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO)
 	if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO)
 		return NULL;

 	pos = 0;
@@ -360,7 +360,7 @@ struct rbug_proto_shader_disable * rbug_demarshal_shader_disable(struct rbug_pro

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_SHADER_DISABLE)
 	if (header->opcode != (int32_t)RBUG_OP_SHADER_DISABLE)
 		return NULL;

 	pos = 0;
@@ -389,7 +389,7 @@ struct rbug_proto_shader_replace * rbug_demarshal_shader_replace(struct rbug_pro

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_SHADER_REPLACE)
 	if (header->opcode != (int32_t)RBUG_OP_SHADER_REPLACE)
 		return NULL;

 	pos = 0;
@@ -418,7 +418,7 @@ struct rbug_proto_shader_list_reply * rbug_demarshal_shader_list_reply(struct rb

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST_REPLY)
 		return NULL;

 	pos = 0;
@@ -446,7 +446,7 @@ struct rbug_proto_shader_info_reply * rbug_demarshal_shader_info_reply(struct rb

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO_REPLY)
 		return NULL;

 	pos = 0;
--- a/src/gallium/auxiliary/rbug/rbug_texture.c
+++ b/src/gallium/auxiliary/rbug/rbug_texture.c
@@ -417,7 +417,7 @@ struct rbug_proto_texture_list * rbug_demarshal_texture_list(struct rbug_proto_h

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST)
 		return NULL;

 	pos = 0;
@@ -443,7 +443,7 @@ struct rbug_proto_texture_info * rbug_demarshal_texture_info(struct rbug_proto_h

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO)
 		return NULL;

 	pos = 0;
@@ -470,7 +470,7 @@ struct rbug_proto_texture_write * rbug_demarshal_texture_write(struct rbug_proto

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_WRITE)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_WRITE)
 		return NULL;

 	pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_texture_read * rbug_demarshal_texture_read(struct rbug_proto_h

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ)
 		return NULL;

 	pos = 0;
@@ -540,7 +540,7 @@ struct rbug_proto_texture_list_reply * rbug_demarshal_texture_list_reply(struct

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST_REPLY)
 		return NULL;

 	pos = 0;
@@ -568,7 +568,7 @@ struct rbug_proto_texture_info_reply * rbug_demarshal_texture_info_reply(struct

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO_REPLY)
 		return NULL;

 	pos = 0;
@@ -606,7 +606,7 @@ struct rbug_proto_texture_read_reply * rbug_demarshal_texture_read_reply(struct

 	if (!header)
 		return NULL;
 	if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ_REPLY)
 	if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ_REPLY)
 		return NULL;

 	pos = 0;
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -58,7 +58,6 @@

 #include <unistd.h>
 #include <sys/mman.h>
 #include "os/os_thread.h"
 #include "util/u_mm.h"

 #define EXEC_HEAP_SIZE (10*1024*1024)
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -23,25 +23,12 @@
 #include "cell/ppu/cell_public.h"
 #endif


 static INLINE struct pipe_screen *
 sw_screen_create(struct sw_winsys *winsys)
 sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
 {
   const char *default_driver;
   const char *driver;
   struct pipe_screen *screen = NULL;

 #if defined(GALLIUM_CELL)
   default_driver = "cell";
 #elif defined(GALLIUM_LLVMPIPE)
   default_driver = "llvmpipe";
 #elif defined(GALLIUM_SOFTPIPE)
   default_driver = "softpipe";
 #else
   default_driver = "";
 #endif

   driver = debug_get_option("GALLIUM_DRIVER", default_driver);

 #if defined(GALLIUM_CELL)
   if (screen == NULL && strcmp(driver, "cell") == 0)
      screen = cell_create_screen(winsys);
@@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys)
   return screen;
 }


 static INLINE struct pipe_screen *
 sw_screen_create(struct sw_winsys *winsys)
 {
   const char *default_driver;
   const char *driver;

 #if defined(GALLIUM_CELL)
   default_driver = "cell";
 #elif defined(GALLIUM_LLVMPIPE)
   default_driver = "llvmpipe";
 #elif defined(GALLIUM_SOFTPIPE)
   default_driver = "softpipe";
 #else
   default_driver = "";
 #endif

   driver = debug_get_option("GALLIUM_DRIVER", default_driver);
   return sw_screen_create_named(winsys, driver);
 }


 #endif
--- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -13,22 +13,28 @@ static INLINE struct pipe_screen *
 sw_screen_wrap(struct pipe_screen *screen)
 {
   struct sw_winsys *sws;
   struct pipe_screen *sw_screen;
   struct pipe_screen *sw_screen = NULL;
   const char *driver;

   sws = wrapper_sw_winsys_warp_pipe_screen(screen);
   driver = debug_get_option("GALLIUM_DRIVER", "native");
   if (strcmp(driver, "native") == 0)
      return screen;

   sws = wrapper_sw_winsys_wrap_pipe_screen(screen);
   if (!sws)
      goto err;

   sw_screen = sw_screen_create(sws);
   if (sw_screen == screen)
   sw_screen = sw_screen_create_named(sws, driver);

   if (!sw_screen)
      goto err_winsys;

   return sw_screen;

 err_winsys:
   sws->destroy(sws);
   return wrapper_sw_winsys_dewrap_pipe_screen(sws);
 err:
  return screen;
   return screen;
 }

 #endif
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -90,7 +90,8 @@ static const char *processor_type_names[] =
   "GEOM"
 };

 static const char *file_names[TGSI_FILE_COUNT] =
 const char *
 tgsi_file_names[TGSI_FILE_COUNT] =
 {
   "NULL",
   "CONST",
@@ -125,7 +126,8 @@ static const char *semantic_names[] =
   "FACE",
   "EDGEFLAG",
   "PRIM_ID",
   "INSTANCEID"
   "INSTANCEID",
   "STENCIL"
 };

 static const char *immediate_type_names[] =
@@ -135,7 +137,8 @@ static const char *immediate_type_names[] =
   "INT32"
 };

 static const char *swizzle_names[] =
 const char *
 tgsi_swizzle_names[] =
 {
   "x",
   "y",
@@ -143,7 +146,8 @@ static const char *swizzle_names[] =
   "w"
 };

 static const char *texture_names[] =
 const char *
 tgsi_texture_names[] =
 {
   "UNKNOWN",
   "1D",
@@ -201,15 +205,15 @@ _dump_register_src(
   struct dump_ctx *ctx,
   const struct tgsi_full_src_register *src )
 {
   ENM(src->Register.File, file_names);
   ENM(src->Register.File, tgsi_file_names);
   if (src->Register.Dimension) {
      if (src->Dimension.Indirect) {
         CHR( '[' );
         ENM( src->DimIndirect.File, file_names );
         ENM( src->DimIndirect.File, tgsi_file_names );
         CHR( '[' );
         SID( src->DimIndirect.Index );
         TXT( "]." );
         ENM( src->DimIndirect.SwizzleX, swizzle_names );
         ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names );
         if (src->Dimension.Index != 0) {
            if (src->Dimension.Index > 0)
               CHR( '+' );
@@ -224,11 +228,11 @@ _dump_register_src(
   }
   if (src->Register.Indirect) {
      CHR( '[' );
      ENM( src->Indirect.File, file_names );
      ENM( src->Indirect.File, tgsi_file_names );
      CHR( '[' );
      SID( src->Indirect.Index );
      TXT( "]." );
      ENM( src->Indirect.SwizzleX, swizzle_names );
      ENM( src->Indirect.SwizzleX, tgsi_swizzle_names );
      if (src->Register.Index != 0) {
         if (src->Register.Index > 0)
            CHR( '+' );
@@ -248,15 +252,15 @@ _dump_register_dst(
   struct dump_ctx *ctx,
   const struct tgsi_full_dst_register *dst )
 {
   ENM(dst->Register.File, file_names);
   ENM(dst->Register.File, tgsi_file_names);
   if (dst->Register.Dimension) {
      if (dst->Dimension.Indirect) {
         CHR( '[' );
         ENM( dst->DimIndirect.File, file_names );
         ENM( dst->DimIndirect.File, tgsi_file_names );
         CHR( '[' );
         SID( dst->DimIndirect.Index );
         TXT( "]." );
         ENM( dst->DimIndirect.SwizzleX, swizzle_names );
         ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names );
         if (dst->Dimension.Index != 0) {
            if (dst->Dimension.Index > 0)
               CHR( '+' );
@@ -271,11 +275,11 @@ _dump_register_dst(
   }
   if (dst->Register.Indirect) {
      CHR( '[' );
      ENM( dst->Indirect.File, file_names );
      ENM( dst->Indirect.File, tgsi_file_names );
      CHR( '[' );
      SID( dst->Indirect.Index );
      TXT( "]." );
      ENM( dst->Indirect.SwizzleX, swizzle_names );
      ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names );
      if (dst->Register.Index != 0) {
         if (dst->Register.Index > 0)
            CHR( '+' );
@@ -351,7 +355,7 @@ iter_declaration(

   TXT( "DCL " );

   ENM(decl->Declaration.File, file_names);
   ENM(decl->Declaration.File, tgsi_file_names);

   /* all geometry shader inputs are two dimensional */
   if (decl->Declaration.File == TGSI_FILE_INPUT &&
@@ -585,10 +589,10 @@ iter_instruction(
          inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z ||
          inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) {
         CHR( '.' );
         ENM( inst->Predicate.SwizzleX, swizzle_names );
         ENM( inst->Predicate.SwizzleY, swizzle_names );
         ENM( inst->Predicate.SwizzleZ, swizzle_names );
         ENM( inst->Predicate.SwizzleW, swizzle_names );
         ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names );
         ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names );
         ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names );
         ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names );
      }

      TXT( ") " );
@@ -641,10 +645,10 @@ iter_instruction(
          src->Register.SwizzleZ != TGSI_SWIZZLE_Z ||
          src->Register.SwizzleW != TGSI_SWIZZLE_W) {
         CHR( '.' );
         ENM( src->Register.SwizzleX, swizzle_names );
         ENM( src->Register.SwizzleY, swizzle_names );
         ENM( src->Register.SwizzleZ, swizzle_names );
         ENM( src->Register.SwizzleW, swizzle_names );
         ENM( src->Register.SwizzleX, tgsi_swizzle_names );
         ENM( src->Register.SwizzleY, tgsi_swizzle_names );
         ENM( src->Register.SwizzleZ, tgsi_swizzle_names );
         ENM( src->Register.SwizzleW, tgsi_swizzle_names );
      }

      if (src->Register.Absolute)
@@ -655,7 +659,7 @@ iter_instruction(

   if (inst->Instruction.Texture) {
      TXT( ", " );
      ENM( inst->Texture.Texture, texture_names );
      ENM( inst->Texture.Texture, tgsi_texture_names );
   }

   switch (inst->Instruction.Opcode) {
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -35,6 +35,15 @@
 extern "C" {
 #endif

 extern const char *
 tgsi_file_names[TGSI_FILE_COUNT];

 extern const char *
 tgsi_swizzle_names[];

 extern const char *
 tgsi_texture_names[];

 void
 tgsi_dump_str(
   const struct tgsi_token *tokens,
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -605,8 +605,10 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
      if ((inst->Src[i].Register.File ==
           inst->Dst[0].Register.File) &&
          (inst->Src[i].Register.Index ==
           inst->Dst[0].Register.Index)) {
          ((inst->Src[i].Register.Index ==
            inst->Dst[0].Register.Index) ||
 	   inst->Src[i].Register.Indirect ||
 	   inst->Dst[0].Register.Indirect)) {
         /* loop over dest channels */
         uint channelsWritten = 0x0;
         FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -163,6 +163,10 @@ OP12(USGE)
 OP12(USHR)
 OP12(USLT)
 OP12(USNE)
 OP01(SWITCH)
 OP01(CASE)
 OP00(DEFAULT)
 OP00(ENDSWITCH)


 #undef OP00
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                  info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
                  info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
                  info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
                  info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid;
                  info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap;
                  info->num_inputs++;
               }
@@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens,

                  /* extra info for special outputs */
                  if (procType == TGSI_PROCESSOR_FRAGMENT &&
                      fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
                     info->writes_z = TRUE;
                  }
                      fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION)
                        info->writes_z = TRUE;
                  if (procType == TGSI_PROCESSOR_FRAGMENT &&
                      fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL)
                        info->writes_stencil = TRUE;
                  if (procType == TGSI_PROCESSOR_VERTEX &&
                      fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) {
                     info->writes_edgeflag = TRUE;
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -45,6 +45,7 @@ struct tgsi_shader_info
   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
   ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
   ubyte input_centroid[PIPE_MAX_SHADER_INPUTS];
   ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
   ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
   ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
@@ -60,6 +61,7 @@ struct tgsi_shader_info
   uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */

   boolean writes_z;  /**< does fragment shader write Z value? */
   boolean writes_stencil; /**< does fragment shader write stencil value? */
   boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
   boolean uses_kill;  /**< KIL or KILP instruction used? */

--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2830,31 +2830,52 @@ static void soa_to_aos( struct x86_function *func,
 * Check if the instructions dst register is the same as any src
 * register and warn if there's a posible SOA dependency.
 */
 static void
 static boolean
 check_soa_dependencies(const struct tgsi_full_instruction *inst)
 {
   switch (inst->Instruction.Opcode) {
   uint opcode = inst->Instruction.Opcode;

   /* XXX: we only handle src/dst aliasing in a few opcodes currently.
    * Need to use an additional temporay to hold the result in the
    * cases where the code is too opaque to fix.
    */

   switch (opcode) {
   case TGSI_OPCODE_ADD:
   case TGSI_OPCODE_MOV:
   case TGSI_OPCODE_MUL:
   case TGSI_OPCODE_RCP:
   case TGSI_OPCODE_RSQ:
   case TGSI_OPCODE_EXP:
   case TGSI_OPCODE_LOG:
   case TGSI_OPCODE_DP3:
   case TGSI_OPCODE_DP4:
   case TGSI_OPCODE_DP2A:
   case TGSI_OPCODE_EX2:
   case TGSI_OPCODE_LG2:
   case TGSI_OPCODE_POW:
   case TGSI_OPCODE_XPD:
   case TGSI_OPCODE_DPH:
   case TGSI_OPCODE_COS:
   case TGSI_OPCODE_SIN:
   case TGSI_OPCODE_TEX:
   case TGSI_OPCODE_TXB:
   case TGSI_OPCODE_TXP:
   case TGSI_OPCODE_NRM:
   case TGSI_OPCODE_NRM4:
   case TGSI_OPCODE_DP2:
      /* OK - these opcodes correctly handle SOA dependencies */
      break;
      return TRUE;
   default:
      if (tgsi_check_soa_dependencies(inst)) {
         uint opcode = inst->Instruction.Opcode;
      if (!tgsi_check_soa_dependencies(inst))
         return TRUE;

         /* XXX: we only handle src/dst aliasing in a few opcodes
          * currently.  Need to use an additional temporay to hold
          * the result in the cases where the code is too opaque to
          * fix.
          */
         if (opcode != TGSI_OPCODE_MOV) {
            debug_printf("Warning: src/dst aliasing in instruction"
                         " is not handled:\n");
            tgsi_dump_instruction(inst, 1);
         }
      }
      debug_printf("Warning: src/dst aliasing in instruction"
                   " is not handled:\n");
      debug_printf("Warning: ");
      tgsi_dump_instruction(inst, 1);

      return FALSE;
   }
 }

@@ -2954,7 +2975,8 @@ tgsi_emit_sse2(
                         tgsi_get_processor_name(proc));
 	 }

         check_soa_dependencies(&parse.FullToken.FullInstruction);
         if (ok)
            ok = check_soa_dependencies(&parse.FullToken.FullInstruction);
         break;

      case TGSI_TOKEN_TYPE_IMMEDIATE:
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -96,7 +96,8 @@ struct ureg_program
      unsigned semantic_name;
      unsigned semantic_index;
      unsigned interp;
      unsigned cylindrical_wrap;
      unsigned char cylindrical_wrap;
      unsigned char centroid;
   } fs_input[UREG_MAX_INPUT];
   unsigned nr_fs_inputs;

@@ -286,11 +287,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,


 struct ureg_src
 ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
 ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
                       unsigned semantic_name,
                       unsigned semantic_index,
                       unsigned interp_mode,
                       unsigned cylindrical_wrap)
                       unsigned cylindrical_wrap,
                       unsigned centroid)
 {
   unsigned i;

@@ -306,6 +308,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
      ureg->fs_input[i].semantic_index = semantic_index;
      ureg->fs_input[i].interp = interp_mode;
      ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
      ureg->fs_input[i].centroid = centroid;
      ureg->nr_fs_inputs++;
   } else {
      set_bad(ureg);
@@ -1126,7 +1129,8 @@ emit_decl_fs(struct ureg_program *ureg,
             unsigned semantic_name,
             unsigned semantic_index,
             unsigned interpolate,
             unsigned cylindrical_wrap)
             unsigned cylindrical_wrap,
             unsigned centroid)
 {
   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);

@@ -1138,6 +1142,7 @@ emit_decl_fs(struct ureg_program *ureg,
   out[0].decl.Interpolate = interpolate;
   out[0].decl.Semantic = 1;
   out[0].decl.CylindricalWrap = cylindrical_wrap;
   out[0].decl.Centroid = centroid;

   out[1].value = 0;
   out[1].decl_range.First = index;
@@ -1287,7 +1292,8 @@ static void emit_decls( struct ureg_program *ureg )
                      ureg->fs_input[i].semantic_name,
                      ureg->fs_input[i].semantic_index,
                      ureg->fs_input[i].interp,
                      ureg->fs_input[i].cylindrical_wrap);
                      ureg->fs_input[i].cylindrical_wrap,
                      ureg->fs_input[i].centroid);
      }
   } else {
      for (i = 0; i < ureg->nr_gs_inputs; i++) {
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -158,11 +158,27 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
 */

 struct ureg_src
 ureg_DECL_fs_input_cyl(struct ureg_program *,
 ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
                       unsigned semantic_name,
                       unsigned semantic_index,
                       unsigned interp_mode,
                       unsigned cylindrical_wrap);
                       unsigned cylindrical_wrap,
                       unsigned centroid);

 static INLINE struct ureg_src
 ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
                       unsigned semantic_name,
                       unsigned semantic_index,
                       unsigned interp_mode,
                       unsigned cylindrical_wrap)
 {
   return ureg_DECL_fs_input_cyl_centroid(ureg,
                                 semantic_name,
                                 semantic_index,
                                 interp_mode,
                                 cylindrical_wrap,
                                 0);
 }

 static INLINE struct ureg_src
 ureg_DECL_fs_input(struct ureg_program *ureg,
@@ -170,11 +186,11 @@ ureg_DECL_fs_input(struct ureg_program *ureg,
                   unsigned semantic_index,
                   unsigned interp_mode)
 {
   return ureg_DECL_fs_input_cyl(ureg,
   return ureg_DECL_fs_input_cyl_centroid(ureg,
                                 semantic_name,
                                 semantic_index,
                                 interp_mode,
                                 0);
                                 0, 0);
 }

 struct ureg_src
--- a/src/gallium/auxiliary/util/u_atomic.h
+++ b/src/gallium/auxiliary/util/u_atomic.h
@@ -29,6 +29,8 @@
 #define PIPE_ATOMIC_ASM_MSVC_X86                
 #elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
 #define PIPE_ATOMIC_ASM_GCC_X86
 #elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64))
 #define PIPE_ATOMIC_ASM_GCC_X86_64
 #elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
 #define PIPE_ATOMIC_GCC_INTRINSIC
 #else
@@ -36,6 +38,51 @@
 #endif


 #if defined(PIPE_ATOMIC_ASM_GCC_X86_64)
 #define PIPE_ATOMIC "GCC x86_64 assembly"

 #ifdef __cplusplus
 extern "C" {
 #endif

 #define p_atomic_set(_v, _i) (*(_v) = (_i))
 #define p_atomic_read(_v) (*(_v))

 static INLINE boolean
 p_atomic_dec_zero(int32_t *v)
 {
   unsigned char c;

   __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c)
 			::"memory");

   return c != 0;
 }

 static INLINE void
 p_atomic_inc(int32_t *v)
 {
   __asm__ __volatile__("lock; incl %0":"+m"(*v));
 }

 static INLINE void
 p_atomic_dec(int32_t *v)
 {
   __asm__ __volatile__("lock; decl %0":"+m"(*v));
 }

 static INLINE int32_t
 p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new)
 {
   return __sync_val_compare_and_swap(v, old, _new);
 }

 #ifdef __cplusplus
 }
 #endif

 #endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */


 #if defined(PIPE_ATOMIC_ASM_GCC_X86)

--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -268,7 +268,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
         pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
   }

   for (i = 0; i <= PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
   for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++)
      if (ctx->fs_col[i])
         pipe->delete_fs_state(pipe, ctx->fs_col[i]);

@@ -964,16 +964,18 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
   blitter_restore_CSOs(ctx);
 }

 /* Clear a region of a depth stencil surface. */
 void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
                                      struct pipe_surface *dstsurf)
 /* draw a rectangle across a region using a custom dsa stage - for r600g */
 void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
 				       struct pipe_surface *zsurf,
 				       struct pipe_surface *cbsurf,
 				       void *dsa_stage, float depth)
 {
   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
   struct pipe_context *pipe = ctx->base.pipe;
   struct pipe_framebuffer_state fb_state;

   assert(dstsurf->texture);
   if (!dstsurf->texture)
   assert(zsurf->texture);
   if (!zsurf->texture)
      return;

   /* check the saved state */
@@ -981,8 +983,8 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
   assert(blitter->saved_fb_state.nr_cbufs != ~0);

   /* bind CSOs */
   pipe->bind_blend_state(pipe, ctx->blend_keep_color);
   pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
   pipe->bind_blend_state(pipe, ctx->blend_write_color);
   pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);

   pipe->bind_rasterizer_state(pipe, ctx->rs_state);
   pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
@@ -990,15 +992,30 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);

   /* set a framebuffer state */
   fb_state.width = dstsurf->width;
   fb_state.height = dstsurf->height;
   fb_state.nr_cbufs = 0;
   fb_state.cbufs[0] = 0;
   fb_state.zsbuf = dstsurf;
   fb_state.width = zsurf->width;
   fb_state.height = zsurf->height;
   fb_state.nr_cbufs = 1;
   if (cbsurf) {
 	   fb_state.cbufs[0] = cbsurf;
 	   fb_state.nr_cbufs = 1;
   } else {
 	   fb_state.cbufs[0] = NULL;
 	   fb_state.nr_cbufs = 0;
   }
   fb_state.zsbuf = zsurf;
   pipe->set_framebuffer_state(pipe, &fb_state);

   blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
   blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0,
   blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
   blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
                           UTIL_BLITTER_ATTRIB_NONE, NULL);
   blitter_restore_CSOs(ctx);
 }

 /* flush a region of a depth stencil surface for r300g */
 void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
                                      struct pipe_surface *dstsurf)
 {
 	struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
 	util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
 					  ctx->dsa_flush_depth_stencil, 0.0f);
 }
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -203,6 +203,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,

 void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
                                      struct pipe_surface *dstsurf);

 void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
 				       struct pipe_surface *zsurf,
 				       struct pipe_surface *cbsurf,
 				       void *dsa_stage, float depth);

 /* The functions below should be used to save currently bound constant state
 * objects inside a driver. The objects are automatically restored at the end
 * of the util_blitter_{clear, copy_region, fill_region} functions and then
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM               , plain, 1, 1, un32,     ,     ,     , x___,
 PIPE_FORMAT_Z32_FLOAT               , plain, 1, 1, f32 ,     ,     ,     , x___, zs
 PIPE_FORMAT_Z24_UNORM_S8_USCALED    , plain, 1, 1, un24, u8  ,     ,     , xy__, zs
 PIPE_FORMAT_S8_USCALED_Z24_UNORM    , plain, 1, 1, u8 ,  un24,     ,     , yx__, zs
 PIPE_FORMAT_X24S8_USCALED           , plain, 1, 1, x24,  u8  ,     ,     , _y__, zs
 PIPE_FORMAT_S8X24_USCALED           , plain, 1, 1, u8  , x24 ,     ,     , _x__, zs
 PIPE_FORMAT_Z24X8_UNORM             , plain, 1, 1, un24, x8  ,     ,     , x___, zs
 PIPE_FORMAT_X8Z24_UNORM             , plain, 1, 1, x8  , un24,     ,     , y___, zs
 PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32,  u8  , x24 ,     , xy__, zs
 PIPE_FORMAT_X32_S8X24_USCALED       , plain, 1, 1, x32,  u8  , x24 ,     , _y__, zs

 # YUV formats
 # http://www.fourcc.org/yuv.php#UYVY
--- a/src/gallium/auxiliary/util/u_format_zs.c
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d
   }
 }


 void
 util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
   util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
 						      src_row, src_stride,
 						      width, height);
 }

 void
 util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
   util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride,
 						    src_row, src_stride,
 						    width, height);
 }

 void
 util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
   util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride,
 						      src_row, src_stride,
 						      width, height);
 }

 void
 util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
   util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride,
 						      src_row, src_stride,
 						      width, height);
 }

 void
 util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
 						const uint8_t *src_row, unsigned src_stride,
 						unsigned width, unsigned height)
 {
   util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
 							 src_row, src_stride,
 							 width, height);

 }

 void
 util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
 					      const uint8_t *src_row, unsigned src_stride,
 					      unsigned width, unsigned height)
 {
   util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride,
                                                       src_row, src_stride,
 						       width, height);
 }
--- a/src/gallium/auxiliary/util/u_format_zs.h
+++ b/src/gallium/auxiliary/util/u_format_zs.h
@@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned
 void
 util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

 void
 util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

 void
 util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

 void
 util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

 void
 util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

 void
 util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);

 void
 util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
 #endif /* U_FORMAT_ZS_H_ */
--- a/src/gallium/auxiliary/util/u_index_modify.c
+++ b/src/gallium/auxiliary/util/u_index_modify.c
@@ -0,0 +1,127 @@
 /*
 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE. */

 #include "pipe/p_context.h"
 #include "util/u_index_modify.h"
 #include "util/u_inlines.h"

 void util_shorten_ubyte_elts(struct pipe_context *context,
 			     struct pipe_resource **elts,
 			     int index_bias,
 			     unsigned start,
 			     unsigned count)
 {
    struct pipe_screen* screen = context->screen;
    struct pipe_resource* new_elts;
    unsigned char *in_map;
    unsigned short *out_map;
    struct pipe_transfer *src_transfer, *dst_transfer;
    unsigned i;

    new_elts = pipe_buffer_create(screen,
                                  PIPE_BIND_INDEX_BUFFER,
                                  2 * count);

    in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
    out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);

    in_map += start;

    for (i = 0; i < count; i++) {
        *out_map = (unsigned short)(*in_map + index_bias);
        in_map++;
        out_map++;
    }

    pipe_buffer_unmap(context, *elts, src_transfer);
    pipe_buffer_unmap(context, new_elts, dst_transfer);

    *elts = new_elts;
 }

 void util_rebuild_ushort_elts(struct pipe_context *context,
 			      struct pipe_resource **elts,
 			      int index_bias,
 			      unsigned start, unsigned count)
 {
    struct pipe_transfer *in_transfer = NULL;
    struct pipe_transfer *out_transfer = NULL;
    struct pipe_resource *new_elts;
    unsigned short *in_map;
    unsigned short *out_map;
    unsigned i;

    new_elts = pipe_buffer_create(context->screen,
                                  PIPE_BIND_INDEX_BUFFER,
                                  2 * count);

    in_map = pipe_buffer_map(context, *elts,
                             PIPE_TRANSFER_READ, &in_transfer);
    out_map = pipe_buffer_map(context, new_elts,
                              PIPE_TRANSFER_WRITE, &out_transfer);

    in_map += start;
    for (i = 0; i < count; i++) {
        *out_map = (unsigned short)(*in_map + index_bias);
        in_map++;
        out_map++;
    }

    pipe_buffer_unmap(context, *elts, in_transfer);
    pipe_buffer_unmap(context, new_elts, out_transfer);

    *elts = new_elts;
 }

 void util_rebuild_uint_elts(struct pipe_context *context,
 			    struct pipe_resource **elts,
 			    int index_bias,
 			    unsigned start, unsigned count)
 {
    struct pipe_transfer *in_transfer = NULL;
    struct pipe_transfer *out_transfer = NULL;
    struct pipe_resource *new_elts;
    unsigned int *in_map;
    unsigned int *out_map;
    unsigned i;

    new_elts = pipe_buffer_create(context->screen,
                                  PIPE_BIND_INDEX_BUFFER,
                                  2 * count);

    in_map = pipe_buffer_map(context, *elts,
                             PIPE_TRANSFER_READ, &in_transfer);
    out_map = pipe_buffer_map(context, new_elts,
                              PIPE_TRANSFER_WRITE, &out_transfer);

    in_map += start;
    for (i = 0; i < count; i++) {
        *out_map = (unsigned int)(*in_map + index_bias);
        in_map++;
        out_map++;
    }

    pipe_buffer_unmap(context, *elts, in_transfer);
    pipe_buffer_unmap(context, new_elts, out_transfer);

    *elts = new_elts;
 }
--- a/src/gallium/auxiliary/util/u_index_modify.h
+++ b/src/gallium/auxiliary/util/u_index_modify.h
@@ -1,5 +1,5 @@
 /*
 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -18,40 +18,24 @@
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *      Jerome Glisse
 */
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 #include "radeon_priv.h"
 * USE OR OTHER DEALINGS IN THE SOFTWARE. */

 /*
 * draw functions
 */
 int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon)
 {
 	draw->radeon = radeon;
 	draw->state = calloc(radeon->max_states, sizeof(void*));
 	if (draw->state == NULL)
 		return -ENOMEM;
 	return 0;
 }
 #ifndef UTIL_INDEX_MODIFY_H
 #define UTIL_INDEX_MODIFY_H

 void util_shorten_ubyte_elts(struct pipe_context *context,
 			     struct pipe_resource **elts,
 			     int index_bias,
 			     unsigned start,
 			     unsigned count);

 void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state)
 {
 	if (state == NULL)
 		return;
 	draw->state[state->state_id] = state;
 }
 void util_rebuild_ushort_elts(struct pipe_context *context,
 			      struct pipe_resource **elts,
 			      int index_bias,
 			      unsigned start, unsigned count);

 void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state)
 {
 	if (state == NULL)
 		return;
 	if (draw->state[state->state_id] == state) {
 		draw->state[state->state_id] = NULL;
 	}
 }
 void util_rebuild_uint_elts(struct pipe_context *context,
 			    struct pipe_resource **elts,
 			    int index_bias,
 			    unsigned start, unsigned count);
 #endif
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val)
 #endif


 #ifndef M_SQRT2
 #define M_SQRT2 1.41421356237309504880
 #endif


 #if defined(_MSC_VER) 

 #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h