Conflicts: src/gallium/drivers/llvmpipe/lp_setup_coef.c src/gallium/drivers/llvmpipe/lp_setup_coef.h src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_setup_tri.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.htags/snb-magic
@@ -329,6 +329,8 @@ GALLIUM_FILES = \ | |||
$(DIRECTORY)/src/gallium/Makefile.template \ | |||
$(DIRECTORY)/src/gallium/SConscript \ | |||
$(DIRECTORY)/src/gallium/targets/Makefile.dri \ | |||
$(DIRECTORY)/src/gallium/targets/Makefile.xorg \ | |||
$(DIRECTORY)/src/gallium/targets/SConscript.dri \ | |||
$(DIRECTORY)/src/gallium/*/Makefile \ | |||
$(DIRECTORY)/src/gallium/*/SConscript \ | |||
$(DIRECTORY)/src/gallium/*/*/Makefile \ | |||
@@ -356,6 +358,7 @@ DRI_FILES = \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/common/xmlpool/*.[ch] \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/common/xmlpool/*.po \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/*/*.[chS] \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/*/*.cpp \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/*/*/*.[chS] \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/*/Makefile \ | |||
$(DIRECTORY)/src/mesa/drivers/dri/*/*/Makefile \ |
@@ -208,7 +208,7 @@ Export('env') | |||
SConscript( | |||
'src/SConscript', | |||
variant_dir = env['build'], | |||
variant_dir = env['build_dir'], | |||
duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html | |||
) | |||
@@ -81,8 +81,8 @@ def AddOptions(opts): | |||
from SCons.Variables.EnumVariable import EnumVariable as EnumOption | |||
except ImportError: | |||
from SCons.Options.EnumOption import EnumOption | |||
opts.Add(BoolOption('debug', 'debug build', 'yes')) | |||
opts.Add(BoolOption('profile', 'profile build', 'no')) | |||
opts.Add(EnumOption('build', 'build type', 'debug', | |||
allowed_values=('debug', 'checked', 'profile', 'release'))) | |||
opts.Add(BoolOption('quiet', 'quiet command lines', 'yes')) | |||
opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, | |||
allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) | |||
@@ -91,3 +91,5 @@ def AddOptions(opts): | |||
opts.Add('toolchain', 'compiler toolchain', 'default') | |||
opts.Add(BoolOption('llvm', 'use LLVM', default_llvm)) | |||
opts.Add(BoolOption('dri', 'build DRI drivers', default_dri)) | |||
opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes')) | |||
opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no')) |
@@ -58,7 +58,7 @@ EGL_DRIVERS_DIRS = glx | |||
DRIVER_DIRS = dri | |||
GALLIUM_WINSYS_DIRS = sw sw/xlib drm/vmware drm/intel drm/i965 | |||
GALLIUM_TARGET_DIRS = egl-swrast | |||
GALLIUM_TARGET_DIRS = | |||
GALLIUM_STATE_TRACKERS_DIRS = egl | |||
DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon \ |
@@ -33,6 +33,10 @@ AC_CHECK_PROGS([MAKE], [gmake make]) | |||
AC_PATH_PROG([MKDEP], [makedepend]) | |||
AC_PATH_PROG([SED], [sed]) | |||
if test "x$MKDEP" = "x"; then | |||
AC_MSG_ERROR([makedepend is required to build Mesa]) | |||
fi | |||
dnl Our fallback install-sh is a symlink to minstall. Use the existing | |||
dnl configuration in that case. | |||
AC_PROG_INSTALL | |||
@@ -692,6 +696,11 @@ AC_SUBST([GLESv2_PC_LIB_PRIV]) | |||
AC_SUBST([HAVE_XF86VIDMODE]) | |||
PKG_CHECK_MODULES([LIBDRM_RADEON], | |||
[libdrm_radeon libdrm >= $LIBDRM_RADEON_REQUIRED], | |||
HAVE_LIBDRM_RADEON=yes, | |||
HAVE_LIBDRM_RADEON=no) | |||
dnl | |||
dnl More X11 setup | |||
dnl | |||
@@ -910,12 +919,7 @@ esac | |||
case $DRI_DIRS in | |||
*radeon*|*r200*|*r300*|*r600*) | |||
PKG_CHECK_MODULES([LIBDRM_RADEON], | |||
[libdrm_radeon libdrm >= $LIBDRM_RADEON_REQUIRED], | |||
HAVE_LIBDRM_RADEON=yes, | |||
HAVE_LIBDRM_RADEON=no) | |||
if test "$HAVE_LIBDRM_RADEON" = yes; then | |||
if test "x$HAVE_LIBDRM_RADEON" = xyes; then | |||
RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS" | |||
RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS | |||
fi | |||
@@ -1363,7 +1367,7 @@ fi | |||
AC_ARG_WITH([egl-platforms], | |||
[AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@], | |||
[comma delimited native platforms libEGL supports, e.g. | |||
"x11,kms" @<:@default=auto@:>@])], | |||
"x11,drm" @<:@default=auto@:>@])], | |||
[with_egl_platforms="$withval"], | |||
[with_egl_platforms=yes]) | |||
AC_ARG_WITH([egl-displays], | |||
@@ -1376,6 +1380,9 @@ case "$with_egl_platforms" in | |||
yes) | |||
if test "x$enable_egl" = xyes && test "x$mesa_driver" != xosmesa; then | |||
EGL_PLATFORMS="x11" | |||
if test "$mesa_driver" = dri; then | |||
EGL_PLATFORMS="$EGL_PLATFORMS drm" | |||
fi | |||
fi | |||
;; | |||
*) | |||
@@ -1518,18 +1525,28 @@ elif test "x$enable_gallium_i965" = xauto; then | |||
fi | |||
dnl | |||
dnl Gallium Radeon configuration | |||
dnl Gallium Radeon r300g configuration | |||
dnl | |||
AC_ARG_ENABLE([gallium-radeon], | |||
[AS_HELP_STRING([--enable-gallium-radeon], | |||
[build gallium radeon @<:@default=disabled@:>@])], | |||
[enable_gallium_radeon="$enableval"], | |||
[enable_gallium_radeon=auto]) | |||
if test "x$enable_gallium_radeon" = xauto; then | |||
if test "x$HAVE_LIBDRM_RADEON" = xyes; then | |||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" | |||
gallium_check_st "radeon/drm" "dri-r300" | |||
else | |||
AC_MSG_WARN([libdrm_radeon is missing, not building gallium-radeon (r300)]) | |||
fi | |||
fi | |||
if test "x$enable_gallium_radeon" = xyes; then | |||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" | |||
gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon" | |||
elif test "x$enable_gallium_radeon" = xauto; then | |||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" | |||
if test "x$HAVE_LIBDRM_RADEON" = xyes; then | |||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" | |||
gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon" | |||
else | |||
AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-radeon (r300)]) | |||
fi | |||
fi | |||
dnl | |||
@@ -1541,8 +1558,12 @@ AC_ARG_ENABLE([gallium-r600], | |||
[enable_gallium_r600="$enableval"], | |||
[enable_gallium_r600=auto]) | |||
if test "x$enable_gallium_r600" = xyes; then | |||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600" | |||
gallium_check_st "r600/drm" "dri-r600" | |||
if test "x$HAVE_LIBDRM_RADEON" = xyes; then | |||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600" | |||
gallium_check_st "r600/drm" "dri-r600" | |||
else | |||
AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-r600]) | |||
fi | |||
fi | |||
dnl |
@@ -25,7 +25,7 @@ Non-normalized Integer texture/framebuffer formats not started | |||
Packed depth/stencil formats DONE | |||
Per-buffer blend and masks (GL_EXT_draw_buffers2) DONE | |||
GL_EXT_texture_compression_rgtc not started | |||
Red and red/green texture formats Ian? | |||
Red and red/green texture formats DONE (swrast, i965) | |||
Transform feedback (GL_EXT_transform_feedback) ~50% done | |||
glBindFragDataLocation, glGetFragDataLocation, | |||
glBindBufferRange, glBindBufferBase commands | |||
@@ -44,7 +44,7 @@ Instanced drawing (GL_ARB_draw_instanced) ~50% done | |||
Buffer copying (GL_ARB_copy_buffer) DONE | |||
Primitive restart (GL_NV_primitive_restart) not started | |||
16 vertex texture image units not started | |||
Texture buffer objs (GL_ARB_textur_buffer_object) not started | |||
Texture buffer objs (GL_ARB_texture_buffer_object) not started | |||
Rectangular textures (GL_ARB_texture_rectangle) DONE | |||
Uniform buffer objs (GL_ARB_uniform_buffer_object) not started | |||
Signed normalized texture formats ~50% done | |||
@@ -69,7 +69,7 @@ GL 3.3: | |||
GLSL 3.30 not started | |||
GL_ARB_blend_func_extended not started | |||
GL_ARB_explicit_attrib_location not started | |||
GL_ARB_explicit_attrib_location DONE (swrast, i915, i965) | |||
GL_ARB_occlusion_query2 not started | |||
GL_ARB_sampler_objects not started | |||
GL_ARB_texture_rgb10_a2ui not started |
@@ -145,7 +145,7 @@ Make sure the values in src/mesa/main/version.h are correct. | |||
</p> | |||
<p> | |||
Update the docs/news.html file and docs/download.html files. | |||
Update docs/news.html. | |||
</p> | |||
<p> | |||
@@ -208,10 +208,11 @@ sftp USERNAME,mesa3d@web.sourceforge.net | |||
<p> | |||
Make an announcement on the mailing lists: | |||
<em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>d</em><em>e</em><em>v</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em>, | |||
<em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>u</em><em>s</em><em>e</em><em>r</em><em>s</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em> | |||
<em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>d</em><em>e</em><em>v</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em>, | |||
<em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>u</em><em>s</em><em>e</em><em>r</em><em>s</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em> | |||
and | |||
<em>m</em><em>e</em><em>s</em><em>a</em><em>3</em><em>d</em><em>-</em><em>a</em><em>n</em><em>n</em><em>o</em><em>u</em><em>n</em><em>c</em><em>e</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>s</em><em>f</em><em>.</em><em>n</em><em>e</em><em>t</em> | |||
<em>m</em><em>e</em><em>s</em><em>a</em><em>-</em><em>a</em><em>n</em><em>n</em><em>o</em><em>u</em><em>n</em><em>c</em><em>e</em><em>@</em><em>l</em><em>i</em><em>s</em><em>t</em><em>s</em><em>.</em><em>f</em><em>r</em><em>e</em><em>e</em><em>d</em><em>e</em><em>s</em><em>k</em><em>t</em><em>o</em><em>p</em><em>.</em><em>o</em><em>r</em><em>g</em> | |||
</p> | |||
@@ -72,13 +72,13 @@ drivers will be installed to <code>${libdir}/egl</code>.</p> | |||
<li><code>--with-egl-platforms</code> | |||
<p>List the platforms (window systems) to support. Its argument is a comma | |||
seprated string such as <code>--with-egl-platforms=x11,kms</code>. It decides | |||
seprated string such as <code>--with-egl-platforms=x11,drm</code>. It decides | |||
the platforms a driver may support. The first listed platform is also used by | |||
the main library to decide the native platform: the platform the EGL native | |||
types such as <code>EGLNativeDisplayType</code> or | |||
<code>EGLNativeWindowType</code> defined for.</p> | |||
<p>The available platforms are <code>x11</code>, <code>kms</code>, | |||
<p>The available platforms are <code>x11</code>, <code>drm</code>, | |||
<code>fbdev</code>, and <code>gdi</code>. The <code>gdi</code> platform can | |||
only be built with SCons.</p> | |||
@@ -9,6 +9,9 @@ | |||
<center><h1>Mesa fbdev/DRI Drivers</h1></center> | |||
<br> | |||
<h1><center>NOTE: this information is obsolete and will be removed at | |||
a future date</center></h1> | |||
<h1>1. Introduction</h1> | |||
<p> | |||
@@ -22,7 +25,7 @@ Contributors to this project include Jon Smirl, Keith Whitwell and Dave Airlie. | |||
<p> | |||
Applications in the fbdev/DRI environment use | |||
the <a href="http://www.nabble.com/file/p15480666/MiniGXL.html"> MiniGLX</a> interface to choose pixel | |||
the MiniGLX interface to choose pixel | |||
formats, create rendering contexts, etc. It's a subset of the GLX and | |||
Xlib interfaces allowing some degree of application portability between | |||
the X and X-less environments. | |||
@@ -315,8 +318,7 @@ It means that the sample_server process is not running. | |||
<h1>5.0 Programming Information</h1> | |||
<p> | |||
OpenGL/Mesa is interfaced to fbdev via the <a href="http://www.nabble.com/file/p15480666/MiniGLX.html">MiniGLX</a> | |||
interface. | |||
OpenGL/Mesa is interfaced to fbdev via the MiniGLX interface. | |||
MiniGLX is a subset of Xlib and GLX API functions which provides just | |||
enough functionality to setup OpenGL rendering and respond to simple | |||
input events. | |||
@@ -332,7 +334,7 @@ This allows some degree of flexibility for software development and testing. | |||
However, the MiniGLX API is not binary-compatible with full Xlib/GLX. | |||
Some of the structures are different and some macros/functions work | |||
differently. | |||
See the <code>GL/miniglx.h</code> header file for details. | |||
See the GL/miniglx.h header file for details. | |||
</p> | |||
@@ -11,6 +11,22 @@ | |||
<H1>News</H1> | |||
<h2>October 4, 2010</h2> | |||
<p> | |||
<a href="relnotes-7.9.html">Mesa 7.9</a> (final) is released. This is a new | |||
development release. | |||
</p> | |||
<h2>September 27, 2010</h2> | |||
<p> | |||
<a href="relnotes-7.9.html">Mesa 7.9.0-rc1</a> is released. This is a | |||
release candidate for the 7.9 development release. | |||
</p> | |||
<h2>June 16, 2010</h2> | |||
<p> | |||
@@ -1277,7 +1293,6 @@ grateful. | |||
<p> | |||
</p><h2>March 18, 1999</h2> | |||
<p>The new webpages are now online. Enjoy, and let me know if you find any errors. | |||
For an eye-candy free version you can use <a href="http://www.mesa3d.org/txt/">http://www.mesa3d.org/txt/</a>.</p> | |||
<p> | |||
</p><h2>February 16, 1999</h2> | |||
<p><a href="http://www.sgi.com/">SGI</a> releases its <a href="http://www.sgi.com/software/opensource/glx/">GLX |
@@ -0,0 +1,53 @@ | |||
<HTML> | |||
<TITLE>Mesa Release Notes</TITLE> | |||
<head><link rel="stylesheet" type="text/css" href="mesa.css"></head> | |||
<BODY> | |||
<body bgcolor="#eeeeee"> | |||
<H1>Mesa 7.10 Release Notes / tbd</H1> | |||
<p> | |||
Mesa 7.10 is a new development release. | |||
People who are concerned with stability and reliability should stick | |||
with a previous release or wait for Mesa 7.10.1. | |||
</p> | |||
<p> | |||
Mesa 7.10 implements the OpenGL 2.1 API, but the version reported by | |||
glGetString(GL_VERSION) depends on the particular driver being used. | |||
Some drivers don't support all the features required in OpenGL 2.1. | |||
</p> | |||
<p> | |||
See the <a href="install.html">Compiling/Installing page</a> for prerequisites | |||
for DRI hardware acceleration. | |||
</p> | |||
<h2>MD5 checksums</h2> | |||
<pre> | |||
tbd | |||
</pre> | |||
<h2>New features</h2> | |||
<ul> | |||
<li>GL_ARB_explicit_attrib_location extension (Intel and software drivers). | |||
</ul> | |||
<h2>Bug fixes</h2> | |||
<ul> | |||
<li>tbd</li> | |||
</ul> | |||
<h2>Changes</h2> | |||
<ul> | |||
<li>tbd</li> | |||
</ul> | |||
</body> | |||
</html> |
@@ -26,7 +26,15 @@ for DRI hardware acceleration. | |||
<h2>MD5 checksums</h2> | |||
<pre> | |||
tbd | |||
c89b63d253605ed40e8ac370d25a833c MesaLib-7.8.2.tar.gz | |||
6be2d343a0089bfd395ce02aaf8adb57 MesaLib-7.8.2.tar.bz2 | |||
a04ad3b06ac5ff3969a003fa7bbf7d5b MesaLib-7.8.2.zip | |||
7c213f92efeb471f0331670d5079d4c0 MesaDemos-7.8.2.tar.gz | |||
757d9e2e06f48b1a52848be9b0307ced MesaDemos-7.8.2.tar.bz2 | |||
8d0e5cfe68b8ebf90265d350ae2c48b1 MesaDemos-7.8.2.zip | |||
b74482e3f44f35ed395c4aada4fd8240 MesaGLUT-7.8.2.tar.gz | |||
a471807b65e49c325808ba4551be93ed MesaGLUT-7.8.2.tar.bz2 | |||
9f190268c42be582ef66e47365ee61e3 MesaGLUT-7.8.2.zip | |||
</pre> | |||
@@ -44,10 +52,95 @@ tbd | |||
<ul> | |||
<li>Fixed Gallium glDrawPixels(GL_DEPTH_COMPONENT). | |||
<li>Fixed Gallium Cell driver to buildable, runable state | |||
<li>Fixed bad error checking for glFramebufferRenderbuffer(attachment=GL_DEPTH_STENCIL_ATTACHMENT). | |||
<li>Fixed incorrect Z coordinate handling in "meta" glDraw/CopyPixels. | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=23670">Bug | |||
#23670</a>.</li> | |||
<li>Assorted i965 driver fixes. | |||
Including but not limited to: | |||
<ul> | |||
<li>Fix scissoring when width or height is | |||
0. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27643">Bug | |||
#27643</a>. | |||
<li>Fix bit allocation for number of color regions for | |||
ARB_draw_buffers.</li> | |||
<li>Set the correct provoking vertex for clipped first-mode | |||
trifans. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24470">Bug | |||
#24470</a>.</li> | |||
<li>Use <code>R16G16B16A16_FLOAT</code> for 3-component half-float.</li> | |||
<li>Fix assertion for surface tile offset usage on Ironlake.</li> | |||
<li>Fix cube map layouts on Ironlake.</li> | |||
<li>When an RB gets a new region, clear the old from the state | |||
cache. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=24119">Bug | |||
#24119</a>.</li> | |||
<li>Reject shaders with uninlined function calls instead of hanging.</li> | |||
</ul> | |||
</li> | |||
<li>Assorted i915 driver fixes. Including but not limited to: | |||
<ul> | |||
<li>Fixed texture LOD clamping in i915 driver. | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=24846">Bug | |||
#24846</a>.</li> | |||
<li>Fix off-by-one for drawing rectangle. | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=27408">Bug | |||
#27408</a>.</li> | |||
</ul> | |||
</li> | |||
<li>Fixed hangs in etracer on 830 and 845 | |||
chipsets. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=26557">Bug | |||
#26557</a>.</li> | |||
<li>Fixed tiling of small textures on all Intel drivers.</li> | |||
<li>Fixed crash in Savage driver when using <code>_mesa_CopyTexImage2D</code>. | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=27652">Bug | |||
#27652</a>.</li> | |||
<li>Assorted GLX fixes. Including but not limited to: | |||
<ul> | |||
<li>Fixed <code>__glXInitializeVisualConfigFromTags</code>'s handling of | |||
unrecognized fbconfig tags.</li> | |||
<li>Fixed regression with <code>GLX_USE_GL</code>. | |||
<li>Fixed config chooser logic for 'mask' matching.</li> | |||
<li>Report swap events correctly in direct rendered case (DRI2)</li> | |||
<li>Fixed build with dri2proto which doesn't define | |||
<code>X_DRI2SwapInterval</code>.</li> | |||
<li>Get <code>GLX_SCREEN</code> first in <code>__glXQueryContextInfo</code>. | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=14245">Bug | |||
#14245</a>.</li> | |||
</ul> | |||
</li> | |||
<li>Assorted GLSL fixes. Including but not limited to: | |||
<ul> | |||
<li>Change variable declared assertion into conditional in GLSL | |||
compiler. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27921">Bug | |||
#27921</a>.</li> | |||
<li>Fix instruction indexing | |||
bugs. <a href="https://bugs.freedesktop.org/show_bug.cgi?id=27566">Bug | |||
#27566</a>.</li> | |||
<li>Updated uniform location / offset encoding to be more like | |||
other implementations.</li> | |||
<li>Don't overwrite a driver's shader infolog with generic failure | |||
message.</li> | |||
</ul> | |||
</li> | |||
<li>Fixed OSMesa build for 16 and 32-bit color channel depth. | |||
<li>Fixed OSMesa build with hidden symbol visibility. libOSMesa no longer links to libGL. | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=28305">Bug | |||
#28305</a>. | |||
<li>Fixed handling of multiple render targets in fixed-function | |||
texture envrionmnent programs.</li> | |||
<li>Fixed conversion errors in <code>signed_rgba8888[rev]</code> texel | |||
fetch.</li> | |||
<li>Don't set srcLevel on <code>GL_TEXTURE_RECTANGLE_ARB</code> targets.</li> | |||
<li>Various build fixes for OpenBSD.</li> | |||
<li>Various build fixes for OS X.</li> | |||
<li>Various build fixes for GCC 3.3.</li> | |||
</ul> | |||
<h2>Changes</h2> | |||
<p>None.</p> | |||
</body> | |||
</html> |
@@ -0,0 +1,89 @@ | |||
<HTML> | |||
<TITLE>Mesa Release Notes</TITLE> | |||
<head><link rel="stylesheet" type="text/css" href="mesa.css"></head> | |||
<BODY> | |||
<body bgcolor="#eeeeee"> | |||
<H1>Mesa 7.8.3 Release Notes / (date tbd)</H1> | |||
<p> | |||
Mesa 7.8.3 is a bug fix release which fixes bugs found since the 7.8.2 release. | |||
</p> | |||
<p> | |||
Mesa 7.8.3 implements the OpenGL 2.1 API, but the version reported by | |||
glGetString(GL_VERSION) depends on the particular driver being used. | |||
Some drivers don't support all the features required in OpenGL 2.1. | |||
</p> | |||
<p> | |||
See the <a href="install.html">Compiling/Installing page</a> for prerequisites | |||
for DRI hardware acceleration. | |||
</p> | |||
<h2>MD5 checksums</h2> | |||
<pre> | |||
x MesaLib-7.8.3.tar.gz | |||
x MesaLib-7.8.3.tar.bz2 | |||
x MesaLib-7.8.3.zip | |||
x MesaDemos-7.8.3.tar.gz | |||
x MesaDemos-7.8.3.tar.bz2 | |||
x MesaDemos-7.8.3.zip | |||
x MesaGLUT-7.8.3.tar.gz | |||
x MesaGLUT-7.8.3.tar.bz2 | |||
x MesaGLUT-7.8.3.zip | |||
</pre> | |||
<h2>New features</h2> | |||
<p>None.</p> | |||
<h2>Changes</h2> | |||
<ul> | |||
<li>The radeon driver should use less memory when searching for a valid mip | |||
image.</li> | |||
</ul> | |||
<h2>Bug fixes</h2> | |||
<ul> | |||
<li>Fix unsupported FB with D24S8 (bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=23670">29116</a>)</li> | |||
<li>Fix ReadPixels crash when reading depth/stencil from an FBO</li> | |||
<li>Fixed a bug rendering to 16-bit buffers using swrast.</li> | |||
<li>Fixed a state tracker/TGSI bug that caused crashes when using Windows' | |||
memory debugging features.</li> | |||
<li>Fixed an issue rendering to 32-bit channels with swrast (bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=29487">29487</a>)</li> | |||
<li>GLSL: fix indirect <TT>gl_TextureMatrix</TT> addressing (bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=28967">28967</a>)</li> | |||
<li>GLSL: fix for bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=27216">27216</a></li> | |||
<li>GLSL: fix zw fragcoord entries in some cases (bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=29183">29183</a>)</li> | |||
<li>Fix texture env generation in some cases (bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=28169">28169</a>)</li> | |||
<li>osmesa: a fix for calling <TT>OSMesaMakeCurrent</TT> twice was applied (bug | |||
<a href="https://bugs.freedesktop.org/show_bug.cgi?id=10966">10966</a></li> | |||
<li>A bug was fixed which could cause Mesa to ignore the | |||
<TT>MESA_EXTENSION_OVERRIDE</TT> environment variable.</li> | |||
<li>A bug related to specular highlights on backfaces was fixed.</li> | |||
<li>A radeon-specific issue with <TT>glCopyTex(Sub)Image</TT> was | |||
corrected.</li> | |||
<li>radeon/wine: flush command stream in more cases, fixing wine d3d9 | |||
tests.</li> | |||
<li>r600: fix sin+cos normalization.</li> | |||
<li>r600: (properly) ignore <TT>GL_COORD_REPLACE</TT> when point sprites are | |||
disabled.</li> | |||
<li>radeon: avoid flushing when the context is not current.</li> | |||
<li>r300c: a bug affecting unaligned BOs was fixed.</li> | |||
<li>r300c: a hardlock caused by ARB_half_float_vertex incorrectly advertised on some chipsets.</li> | |||
</ul> | |||
</body> | |||
</html> |
@@ -8,7 +8,7 @@ | |||
<body bgcolor="#eeeeee"> | |||
<H1>Mesa 7.9 Release Notes / date TBD</H1> | |||
<H1>Mesa 7.9 Release Notes / October 4, 2010</H1> | |||
<p> | |||
Mesa 7.9 is a new development release. | |||
@@ -28,7 +28,12 @@ for DRI hardware acceleration. | |||
<h2>MD5 checksums</h2> | |||
<pre> | |||
tbd | |||
ed65ab425b25895c7f473d0a5e6e64f8 MesaLib-7.9.tar.gz | |||
82c740c49d572baa6da2b1a1eee90bca MesaLib-7.9.tar.bz2 | |||
cd2b6ecec759b0457475e94bbb38fedb MesaLib-7.9.zip | |||
7b54af9fb9b1f6a1a65db2520f50848f MesaGLUT-7.9.tar.gz | |||
20d07419d1929f833fdb36bced290ad5 MesaGLUT-7.9.tar.bz2 | |||
62a7edecd7c92675cd6029b05217eb0a MesaGLUT-7.9.zip | |||
</pre> | |||
@@ -37,16 +42,85 @@ tbd | |||
<li>New, improved GLSL compiler written by Intel. | |||
See the <a href="shading.html"> Shading Language</a> page for | |||
more information. | |||
<li>GL_EXT_timer_query extension (i965 driver only) | |||
<li>New, very experimental Gallium driver for R600-R700 Radeons. | |||
<li>Support for AMD Evergreen-based Radeons (HD 5xxx) | |||
<li>GL_EXT_timer_query extension (i965 driver and softpipe only) | |||
<li>GL_EXT_framebuffer_multisample extension (intel drivers, MAX_SAMPLES = 1) | |||
<li>GL_ARB_texture_swizzle extension (alias of GL_EXT_texture_swizzle) | |||
<li>GL_ARB_draw_elements_base_vertex, GL_ARB_fragment_program_shadow | |||
and GL_EXT_draw_buffers2 in Gallium drivers | |||
<li>GL_ARB_draw_elements_base_vertex, GL_ARB_fragment_program_shadow, | |||
GL_ARB_window_pos, GL_EXT_gpu_program_parameters, | |||
GL_ATI_texture_env_combine3, GL_MESA_pack_invert, and GL_OES_EGL_image | |||
extensions in Gallium drivers | |||
<li>GL_ARB_depth_clamp and GL_NV_depth_clamp extensions (in nv50 and r600 | |||
Gallium drivers) | |||
<li>GL_ARB_half_float_vertex extension (in nvfx, r300, r600, softpipe, | |||
and llvmpipe Gallium drivers) | |||
<li>GL_EXT_draw_buffers2 (in nv50, r600, softpipe, and llvmpipe Gallium | |||
drivers) | |||
<li>GL_EXT_texture_swizzle (in nvfx, r300, r600, softpipe, and llvmpipe | |||
Gallium drivers) | |||
<li>GL_ATI_texture_mirror_once (in nvfx, nv50, r300, r600, softpipe, and | |||
llvmpipe Gallium drivers) | |||
<li>GL_NV_conditional_render (in r300 Gallium driver) | |||
<li>Initial "signs of life" support for Sandybridge hardware in i965 DRI | |||
driver. | |||
</ul> | |||
<h2>Bug fixes</h2> | |||
<p>This list is likely incomplete.</p> | |||
<ul> | |||
<li>Massive improvements to the Gallium driver for R300-R500 Radeons; this | |||
driver is now considered stable for use as a DRI (OpenGL) driver. | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=10908">Bug 10908</a> - GLSL: gl_FogParamaters gl_Fog built-in uniform not functioning</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=13753">Bug 13753</a> - Numerous bugs in GLSL uniform handling</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=16854">Bug 16854</a> - GLSL function call at global scope causes SEGV</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=16856">Bug 16856</a> - GLSL indexing of unsized array results in assertion failure</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=18659">Bug 18659</a> - Crash in shader/slang/slang_codegen.c _slang_gen_function_call_name()</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=19089">Bug 19089</a> - [GLSL] glsl1/shadow2D() cases fail</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=22622">Bug 22622</a> - [GM965 GLSL] noise*() cause GPU lockup</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=23743">Bug 23743</a> - For loop from 0 to 0 not optimized out</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=24553">Bug 24553</a> - shader compilation times explode when using more () pairs</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25664">Bug 25664</a> - [GLSL] re-declaring an empty array fails to compile</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25769">Bug 25769</a> - [GLSL] "float" can be implicitly converted to "int"</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25808">Bug 25808</a> - [GLSL] const variable is modified successfully</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25826">Bug 25826</a> - [GLSL] declaring an unsized array then re-declaring with a size fails</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25827">Bug 25827</a> - [GLSL] vector constructor accepts too many arguments successfully</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25829">Bug 25829</a> - [GLSL] allowing non-void function without returning value</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25830">Bug 25830</a> - [GLSL] allowing non-constant-expression as const declaration initializer</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25877">Bug 25877</a> - [GLSL 1.10] implicit conversion from "int" to "float" should not be allowed</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25878">Bug 25878</a> - [GLSL] sampler is converted to int successfully</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25994">Bug 25994</a> - [GM45][GLSL] 'return' statement in vertex shader unsupported</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25999">Bug 25999</a> - [GLSL] embedded structure constructor fails to compile</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26000">Bug 26000</a> - [GLSL] allowing different parameter qualifier between the function definition and declaration</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26001">Bug 26001</a> - [GLSL 1.10] constructing matrix from matrix succeeds</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26224">Bug 26224</a> - [GLSL] Cannot get location of a uniform struct member</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=26990">Bug 26990</a> - [GLSL] variable declaration in "while" fails to compile</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27004">Bug 27004</a> - [GLSL] allowing macro redefinition</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27060">Bug 27060</a> - [965] piglit glsl-fs-raytrace failure due to lack of function calls.</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27216">Bug 27216</a> - Assignment with a function call in an if statement causes an assertion failure</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27261">Bug 27261</a> - GLSL Compiler fails on the following vertex shader</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27265">Bug 27265</a> - GLSL Compiler doesnt link the attached vertex shader</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27388">Bug 27388</a> - [i965] piglit glsl-vs-arrays failure</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27403">Bug 27403</a> - GLSL struct causing "Invalid src register file ..." error</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27914">Bug 27914</a> - GLSL compiler uses MUL+ADD where it could use MAD</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28055">Bug 28055</a> - glsl-texcoord-array fails GLSL compilation</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28374">Bug 28374</a> - SIGSEGV shader/slang/slang_typeinfo.c:534</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28748">Bug 28748</a> - [i965] uninlined function calls support</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28833">Bug 28833</a> - piglit/shaders/glsl-texcoord-array fail</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28834">Bug 28834</a> - Add support for system fpclassify to GL_OES_query_matrix function for OpenBSD / NetBSD</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28837">Bug 28837</a> - varying vec4 index support</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28845">Bug 28845</a> - The GLU tesselator code has some warnings</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28889">Bug 28889</a> - [regression] wine game crash</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28894">Bug 28894</a> - slang build fails if absolute path contains spaces</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28913">Bug 28913</a> - [GLSL] allowing two version statements</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28931">Bug 28931</a> - Floating Point Exception in Warzone2100 Trunk version</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28966">Bug 28966</a> - [r300g] Dynamic branching 3 demo does not run</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28967">Bug 28967</a> - slang/slang_emit.c:350: storage_to_src_reg: Assertion `index >= 0' failed.</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29013">Bug 29013</a> - [r300g] translate_rgb_op: unknown opcode ILLEGAL OPCODE</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29020">Bug 29020</a> - [r300g] Wine d3d9 tests hardlock</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29910">Bug 29910</a> - Mesa advertises bogus GL_ARB_shading_language_120</li> | |||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=30196">Bug 30196</a> - [GLSL] gl_TextureMatrix{Inverse,Transpose,InverseTranspose} unsupported</li> | |||
</ul> | |||
@@ -14,6 +14,7 @@ The release notes summarize what's new or changed in each Mesa release. | |||
<UL> | |||
<LI><A HREF="relnotes-7.9.html">7.9 release notes</A> | |||
<LI><A HREF="relnotes-7.8.3.html">7.8.3 release notes</A> | |||
<LI><A HREF="relnotes-7.8.2.html">7.8.2 release notes</A> | |||
<LI><A HREF="relnotes-7.8.1.html">7.8.1 release notes</A> | |||
<LI><A HREF="relnotes-7.8.html">7.8 release notes</A> |
@@ -805,7 +805,7 @@ typedef struct __DRIimageExtensionRec __DRIimageExtension; | |||
struct __DRIimageExtensionRec { | |||
__DRIextension base; | |||
__DRIimage *(*createImageFromName)(__DRIcontext *context, | |||
__DRIimage *(*createImageFromName)(__DRIscreen *screen, | |||
int width, int height, int format, | |||
int name, int pitch, | |||
void *loaderPrivate); | |||
@@ -841,7 +841,7 @@ typedef struct __DRIimageLookupExtensionRec __DRIimageLookupExtension; | |||
struct __DRIimageLookupExtensionRec { | |||
__DRIextension base; | |||
__DRIimage *(*lookupEGLImage)(__DRIcontext *context, void *image, | |||
__DRIimage *(*lookupEGLImage)(__DRIscreen *screen, void *image, | |||
void *loaderPrivate); | |||
}; | |||
@@ -54,11 +54,13 @@ prefixes32 = SCons.Util.Split(""" | |||
i586-mingw32msvc- | |||
i686-mingw32msvc- | |||
i686-pc-mingw32- | |||
i686-w64-mingw32- | |||
""") | |||
prefixes64 = SCons.Util.Split(""" | |||
amd64-mingw32- | |||
amd64-mingw32msvc- | |||
amd64-pc-mingw32- | |||
x86_64-w64-mingw32- | |||
""") | |||
def find(env): |
@@ -49,14 +49,14 @@ def symlink(target, source, env): | |||
os.symlink(os.path.basename(source), target) | |||
def install(env, source, subdir): | |||
target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build'], subdir) | |||
target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'], subdir) | |||
env.Install(target_dir, source) | |||
def install_program(env, source): | |||
install(env, source, 'bin') | |||
def install_shared_library(env, sources, version = ()): | |||
install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build']) | |||
install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir']) | |||
version = tuple(map(str, version)) | |||
if env['SHLIBSUFFIX'] == '.dll': | |||
dlls = env.FindIxes(sources, 'SHLIBPREFIX', 'SHLIBSUFFIX') | |||
@@ -130,7 +130,6 @@ def generate(env): | |||
env['msvc'] = env['CC'] == 'cl' | |||
# shortcuts | |||
debug = env['debug'] | |||
machine = env['machine'] | |||
platform = env['platform'] | |||
x86 = env['machine'] == 'x86' | |||
@@ -138,20 +137,42 @@ def generate(env): | |||
gcc = env['gcc'] | |||
msvc = env['msvc'] | |||
# Backwards compatability with the debug= profile= options | |||
if env['build'] == 'debug': | |||
if not env['debug']: | |||
print 'scons: debug option is deprecated: use instead build=release' | |||
env['build'] = 'release' | |||
if env['profile']: | |||
print 'scons: profile option is deprecated: use instead build=profile' | |||
env['build'] = 'profile' | |||
if False: | |||
# Enforce SConscripts to use the new build variable | |||
env.popitem('debug') | |||
env.popitem('profile') | |||
else: | |||
# Backwards portability with older sconscripts | |||
if env['build'] in ('debug', 'checked'): | |||
env['debug'] = True | |||
env['profile'] = False | |||
if env['build'] == 'profile': | |||
env['debug'] = False | |||
env['profile'] = True | |||
if env['build'] == 'release': | |||
env['debug'] = False | |||
env['profile'] = False | |||
# Put build output in a separate dir, which depends on the current | |||
# configuration. See also http://www.scons.org/wiki/AdvancedBuildExample | |||
build_topdir = 'build' | |||
build_subdir = env['platform'] | |||
if env['machine'] != 'generic': | |||
build_subdir += '-' + env['machine'] | |||
if env['debug']: | |||
build_subdir += "-debug" | |||
if env['profile']: | |||
build_subdir += "-profile" | |||
if env['build'] != 'release': | |||
build_subdir += '-' + env['build'] | |||
build_dir = os.path.join(build_topdir, build_subdir) | |||
# Place the .sconsign file in the build dir too, to avoid issues with | |||
# different scons versions building the same source file | |||
env['build'] = build_dir | |||
env['build_dir'] = build_dir | |||
env.SConsignFile(os.path.join(build_dir, '.sconsign')) | |||
if 'SCONS_CACHE_DIR' in os.environ: | |||
print 'scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],) | |||
@@ -165,11 +186,11 @@ def generate(env): | |||
# C preprocessor options | |||
cppdefines = [] | |||
if debug: | |||
if env['build'] in ('debug', 'checked'): | |||
cppdefines += ['DEBUG'] | |||
else: | |||
cppdefines += ['NDEBUG'] | |||
if env['profile']: | |||
if env['build'] == 'profile': | |||
cppdefines += ['PROFILE'] | |||
if platform == 'windows': | |||
cppdefines += [ | |||
@@ -190,7 +211,7 @@ def generate(env): | |||
'_SCL_SECURE_NO_WARNINGS', | |||
'_SCL_SECURE_NO_DEPRECATE', | |||
] | |||
if debug: | |||
if env['build'] in ('debug', 'checked'): | |||
cppdefines += ['_DEBUG'] | |||
if env['toolchain'] == 'winddk': | |||
# Mimic WINDDK's builtin flags. See also: | |||
@@ -217,7 +238,7 @@ def generate(env): | |||
('__BUILDMACHINE__', 'WinDDK'), | |||
('FPO', '0'), | |||
] | |||
if debug: | |||
if env['build'] in ('debug', 'checked'): | |||
cppdefines += [('DBG', 1)] | |||
if platform == 'wince': | |||
cppdefines += [ | |||
@@ -253,15 +274,16 @@ def generate(env): | |||
ccflags = [] # C & C++ | |||
if gcc: | |||
ccversion = env['CCVERSION'] | |||
if debug: | |||
ccflags += ['-O0', '-g3'] | |||
if env['build'] == 'debug': | |||
ccflags += ['-O0'] | |||
elif ccversion.startswith('4.2.'): | |||
# gcc 4.2.x optimizer is broken | |||
print "warning: gcc 4.2.x optimizer is broken -- disabling optimizations" | |||
ccflags += ['-O0', '-g3'] | |||
ccflags += ['-O0'] | |||
else: | |||
ccflags += ['-O3', '-g3'] | |||
if env['profile']: | |||
ccflags += ['-O3'] | |||
ccflags += ['-g3'] | |||
if env['build'] in ('checked', 'profile'): | |||
# See http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#Which_options_should_I_pass_to_gcc_when_compiling_for_profiling? | |||
ccflags += [ | |||
'-fno-omit-frame-pointer', | |||
@@ -320,7 +342,7 @@ def generate(env): | |||
# See also: | |||
# - http://msdn.microsoft.com/en-us/library/19z1t1wy.aspx | |||
# - cl /? | |||
if debug: | |||
if env['build'] == 'debug': | |||
ccflags += [ | |||
'/Od', # disable optimizations | |||
'/Oi', # enable intrinsic functions | |||
@@ -389,7 +411,7 @@ def generate(env): | |||
if env['platform'] == 'windows' and msvc: | |||
# Choose the appropriate MSVC CRT | |||
# http://msdn.microsoft.com/en-us/library/2kzt1wy3.aspx | |||
if env['debug']: | |||
if env['build'] in ('debug', 'checked'): | |||
env.Append(CCFLAGS = ['/MTd']) | |||
env.Append(SHCCFLAGS = ['/LDd']) | |||
else: | |||
@@ -421,7 +443,7 @@ def generate(env): | |||
else: | |||
env['_LIBFLAGS'] = '-Wl,--start-group ' + env['_LIBFLAGS'] + ' -Wl,--end-group' | |||
if msvc: | |||
if not env['debug']: | |||
if env['build'] != 'debug': | |||
# enable Link-time Code Generation | |||
linkflags += ['/LTCG'] | |||
env.Append(ARFLAGS = ['/LTCG']) | |||
@@ -460,7 +482,7 @@ def generate(env): | |||
'/entry:DrvEnableDriver', | |||
] | |||
if env['debug'] or env['profile']: | |||
if env['build'] != 'release': | |||
linkflags += [ | |||
'/MAP', # http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx | |||
] |
@@ -124,7 +124,7 @@ def generate(env): | |||
# Some of the LLVM C headers use the inline keyword without | |||
# defining it. | |||
env.Append(CPPDEFINES = [('inline', '__inline')]) | |||
if env['debug']: | |||
if env['build'] in ('debug', 'checked'): | |||
# LLVM libraries are static, build with /MT, and they | |||
# automatically link agains LIBCMT. When we're doing a | |||
# debug build we'll be linking against LIBCMTD, so disable |
@@ -122,7 +122,7 @@ def get_wce600_paths(env): | |||
host_cpu = os.environ.get('_HOSTCPUTYPE', 'i386') | |||
target_cpu = os.environ.get('_TGTCPU', 'x86') | |||
if env['debug']: | |||
if env['build'] == 'debug': | |||
build = 'debug' | |||
else: | |||
build = 'retail' |
@@ -292,7 +292,7 @@ dri2_process_buffers(struct dri2_egl_surface *dri2_surf, | |||
struct dri2_egl_display *dri2_dpy = | |||
dri2_egl_display(dri2_surf->base.Resource.Display); | |||
xcb_rectangle_t rectangle; | |||
int i; | |||
unsigned i; | |||
dri2_surf->buffer_count = count; | |||
dri2_surf->have_fake_front = 0; | |||
@@ -339,6 +339,8 @@ dri2_get_buffers(__DRIdrawable * driDrawable, | |||
xcb_dri2_get_buffers_reply_t *reply; | |||
xcb_dri2_get_buffers_cookie_t cookie; | |||
(void) driDrawable; | |||
cookie = xcb_dri2_get_buffers_unchecked (dri2_dpy->conn, | |||
dri2_surf->drawable, | |||
count, count, attachments); | |||
@@ -360,23 +362,28 @@ dri2_get_buffers(__DRIdrawable * driDrawable, | |||
static void | |||
dri2_flush_front_buffer(__DRIdrawable * driDrawable, void *loaderPrivate) | |||
{ | |||
(void) driDrawable; | |||
/* FIXME: Does EGL support front buffer rendering at all? */ | |||
#if 0 | |||
struct dri2_egl_surface *dri2_surf = loaderPrivate; | |||
dri2WaitGL(dri2_surf); | |||
#else | |||
(void) loaderPrivate; | |||
#endif | |||
} | |||
static __DRIimage * | |||
dri2_lookup_egl_image(__DRIcontext *context, void *image, void *data) | |||
dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data) | |||
{ | |||
struct dri2_egl_context *dri2_ctx = data; | |||
_EGLDisplay *disp = dri2_ctx->base.Resource.Display; | |||
_EGLDisplay *disp = data; | |||
struct dri2_egl_image *dri2_img; | |||
_EGLImage *img; | |||
(void) screen; | |||
img = _eglLookupImage(image, disp); | |||
if (img == NULL) { | |||
_eglError(EGL_BAD_PARAMETER, "dri2_lookup_egl_image"); | |||
@@ -407,6 +414,8 @@ dri2_get_buffers_with_format(__DRIdrawable * driDrawable, | |||
xcb_dri2_get_buffers_with_format_cookie_t cookie; | |||
xcb_dri2_attach_format_t *format_attachments; | |||
(void) driDrawable; | |||
format_attachments = (xcb_dri2_attach_format_t *) attachments; | |||
cookie = xcb_dri2_get_buffers_with_format_unchecked (dri2_dpy->conn, | |||
dri2_surf->drawable, | |||
@@ -440,14 +449,14 @@ struct dri2_extension_match { | |||
static struct dri2_extension_match dri2_driver_extensions[] = { | |||
{ __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) }, | |||
{ __DRI_DRI2, 1, offsetof(struct dri2_egl_display, dri2) }, | |||
{ NULL } | |||
{ NULL, 0, 0 } | |||
}; | |||
static struct dri2_extension_match dri2_core_extensions[] = { | |||
{ __DRI2_FLUSH, 1, offsetof(struct dri2_egl_display, flush) }, | |||
{ __DRI_TEX_BUFFER, 2, offsetof(struct dri2_egl_display, tex_buffer) }, | |||
{ __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) }, | |||
{ NULL } | |||
{ NULL, 0, 0 } | |||
}; | |||
static EGLBoolean | |||
@@ -728,7 +737,7 @@ dri2_create_screen(_EGLDisplay *disp) | |||
dri2_dpy = disp->DriverData; | |||
dri2_dpy->dri_screen = | |||
dri2_dpy->dri2->createNewScreen(0, dri2_dpy->fd, dri2_dpy->extensions, | |||
&dri2_dpy->driver_configs, dri2_dpy); | |||
&dri2_dpy->driver_configs, disp); | |||
if (dri2_dpy->dri_screen == NULL) { | |||
_eglLog(_EGL_WARNING, "DRI2: failed to create dri screen"); | |||
@@ -772,6 +781,8 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp, | |||
{ | |||
struct dri2_egl_display *dri2_dpy; | |||
(void) drv; | |||
dri2_dpy = malloc(sizeof *dri2_dpy); | |||
if (!dri2_dpy) | |||
return _eglError(EGL_BAD_ALLOC, "eglInitialize"); | |||
@@ -1075,6 +1086,8 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, | |||
const __DRIconfig *dri_config; | |||
int api; | |||
(void) drv; | |||
dri2_ctx = malloc(sizeof *dri2_ctx); | |||
if (!dri2_ctx) { | |||
_eglError(EGL_BAD_ALLOC, "eglCreateContext"); | |||
@@ -1146,6 +1159,8 @@ dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) | |||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); | |||
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); | |||
(void) drv; | |||
if (_eglIsSurfaceBound(surf)) | |||
return EGL_TRUE; | |||
@@ -1221,6 +1236,8 @@ dri2_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, | |||
xcb_screen_iterator_t s; | |||
xcb_generic_error_t *error; | |||
(void) drv; | |||
dri2_surf = malloc(sizeof *dri2_surf); | |||
if (!dri2_surf) { | |||
_eglError(EGL_BAD_ALLOC, "dri2_create_surface"); | |||
@@ -1369,7 +1386,7 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw, | |||
xcb_rectangle_t rectangles[16]; | |||
int i; | |||
if (numRects > ARRAY_SIZE(rectangles)) | |||
if (numRects > (int)ARRAY_SIZE(rectangles)) | |||
return dri2_copy_region(drv, disp, draw, dri2_surf->region); | |||
/* FIXME: Invert y here? */ | |||
@@ -1394,6 +1411,8 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw, | |||
static _EGLProc | |||
dri2_get_proc_address(_EGLDriver *drv, const char *procname) | |||
{ | |||
(void) drv; | |||
/* FIXME: Do we need to support lookup of EGL symbols too? */ | |||
return (_EGLProc) _glapi_get_proc_address(procname); | |||
@@ -1405,6 +1424,8 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) | |||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); | |||
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface); | |||
(void) drv; | |||
/* FIXME: If EGL allows frontbuffer rendering for window surfaces, | |||
* we need to copy fake to real here.*/ | |||
@@ -1416,6 +1437,9 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) | |||
static EGLBoolean | |||
dri2_wait_native(_EGLDriver *drv, _EGLDisplay *disp, EGLint engine) | |||
{ | |||
(void) drv; | |||
(void) disp; | |||
if (engine != EGL_CORE_NATIVE_ENGINE) | |||
return _eglError(EGL_BAD_PARAMETER, "eglWaitNative"); | |||
/* glXWaitX(); */ | |||
@@ -1438,6 +1462,8 @@ dri2_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, | |||
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); | |||
xcb_gcontext_t gc; | |||
(void) drv; | |||
(*dri2_dpy->flush->flush)(dri2_surf->dri_drawable); | |||
gc = xcb_generate_id(dri2_dpy->conn); | |||
@@ -1501,6 +1527,11 @@ static EGLBoolean | |||
dri2_release_tex_image(_EGLDriver *drv, | |||
_EGLDisplay *disp, _EGLSurface *surf, EGLint buffer) | |||
{ | |||
(void) drv; | |||
(void) disp; | |||
(void) surf; | |||
(void) buffer; | |||
return EGL_TRUE; | |||
} | |||
@@ -1509,7 +1540,6 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx, | |||
EGLClientBuffer buffer, const EGLint *attr_list) | |||
{ | |||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); | |||
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); | |||
struct dri2_egl_image *dri2_img; | |||
unsigned int attachments[1]; | |||
xcb_drawable_t drawable; | |||
@@ -1521,6 +1551,8 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx, | |||
xcb_generic_error_t *error; | |||
int stride, format; | |||
(void) ctx; | |||
drawable = (xcb_drawable_t) buffer; | |||
xcb_dri2_create_drawable (dri2_dpy->conn, drawable); | |||
attachments[0] = XCB_DRI2_ATTACHMENT_BUFFER_FRONT_LEFT; | |||
@@ -1577,7 +1609,7 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx, | |||
stride = buffers[0].pitch / buffers[0].cpp; | |||
dri2_img->dri_image = | |||
dri2_dpy->image->createImageFromName(dri2_ctx->dri_context, | |||
dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen, | |||
buffers_reply->width, | |||
buffers_reply->height, | |||
format, | |||
@@ -1628,10 +1660,11 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx, | |||
EGLClientBuffer buffer, const EGLint *attr_list) | |||
{ | |||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); | |||
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); | |||
struct dri2_egl_image *dri2_img; | |||
EGLint width, height, format, name, stride, pitch, i, err; | |||
(void) ctx; | |||
name = (EGLint) buffer; | |||
err = EGL_SUCCESS; | |||
@@ -1697,7 +1730,7 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx, | |||
} | |||
dri2_img->dri_image = | |||
dri2_dpy->image->createImageFromName(dri2_ctx->dri_context, | |||
dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen, | |||
width, | |||
height, | |||
format, | |||
@@ -1718,6 +1751,8 @@ dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, | |||
_EGLContext *ctx, EGLenum target, | |||
EGLClientBuffer buffer, const EGLint *attr_list) | |||
{ | |||
(void) drv; | |||
switch (target) { | |||
case EGL_NATIVE_PIXMAP_KHR: | |||
return dri2_create_image_khr_pixmap(disp, ctx, buffer, attr_list); | |||
@@ -1737,6 +1772,8 @@ dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image) | |||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); | |||
struct dri2_egl_image *dri2_img = dri2_egl_image(image); | |||
(void) drv; | |||
dri2_dpy->image->destroyImage(dri2_img->dri_image); | |||
free(dri2_img); | |||
@@ -1753,6 +1790,8 @@ dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, | |||
unsigned int use, dri_use, valid_mask; | |||
EGLint err = EGL_SUCCESS; | |||
(void) drv; | |||
dri2_img = malloc(sizeof *dri2_img); | |||
if (!dri2_img) { | |||
_eglError(EGL_BAD_ALLOC, "dri2_create_image_khr"); | |||
@@ -1853,6 +1892,8 @@ dri2_export_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, | |||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); | |||
struct dri2_egl_image *dri2_img = dri2_egl_image(img); | |||
(void) drv; | |||
if (name && !dri2_dpy->image->queryImage(dri2_img->dri_image, | |||
__DRI_IMAGE_ATTRIB_NAME, name)) { | |||
_eglError(EGL_BAD_ALLOC, "dri2_export_drm_image_mesa"); | |||
@@ -1879,6 +1920,8 @@ _eglMain(const char *args) | |||
{ | |||
struct dri2_egl_driver *dri2_drv; | |||
(void) args; | |||
dri2_drv = malloc(sizeof *dri2_drv); | |||
if (!dri2_drv) | |||
return NULL; |
@@ -162,7 +162,8 @@ static EGLBoolean | |||
convert_fbconfig(Display *dpy, GLXFBConfig fbconfig, | |||
struct GLX_egl_config *GLX_conf) | |||
{ | |||
int err = 0, attr, egl_attr, val, i; | |||
int err = 0, attr, egl_attr, val; | |||
unsigned i; | |||
EGLint conformant, config_caveat, surface_type; | |||
for (i = 0; i < ARRAY_SIZE(fbconfig_attributes); i++) { | |||
@@ -243,7 +244,8 @@ static EGLBoolean | |||
convert_visual(Display *dpy, XVisualInfo *vinfo, | |||
struct GLX_egl_config *GLX_conf) | |||
{ | |||
int err, attr, egl_attr, val, i; | |||
int err, attr, egl_attr, val; | |||
unsigned i; | |||
EGLint conformant, config_caveat, surface_type; | |||
/* the visual must support OpenGL */ | |||
@@ -457,6 +459,8 @@ GLX_eglInitialize(_EGLDriver *drv, _EGLDisplay *disp, | |||
{ | |||
struct GLX_egl_display *GLX_dpy; | |||
(void) drv; | |||
if (disp->Platform != _EGL_PLATFORM_X11) | |||
return EGL_FALSE; | |||
@@ -541,6 +545,8 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, | |||
struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp); | |||
struct GLX_egl_context *GLX_ctx_shared = GLX_egl_context(share_list); | |||
(void) drv; | |||
if (!GLX_ctx) { | |||
_eglError(EGL_BAD_ALLOC, "eglCreateContext"); | |||
return NULL; | |||
@@ -604,6 +610,8 @@ GLX_eglMakeCurrent(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, | |||
GLXContext cctx; | |||
EGLBoolean ret = EGL_FALSE; | |||
(void) drv; | |||
/* bind the new context and return the "orphaned" one */ | |||
if (!_eglBindContext(&ctx, &dsurf, &rsurf)) | |||
return EGL_FALSE; | |||
@@ -656,6 +664,8 @@ GLX_eglCreateWindowSurface(_EGLDriver *drv, _EGLDisplay *disp, | |||
struct GLX_egl_surface *GLX_surf; | |||
uint width, height; | |||
(void) drv; | |||
GLX_surf = CALLOC_STRUCT(GLX_egl_surface); | |||
if (!GLX_surf) { | |||
_eglError(EGL_BAD_ALLOC, "eglCreateWindowSurface"); | |||
@@ -702,6 +712,8 @@ GLX_eglCreatePixmapSurface(_EGLDriver *drv, _EGLDisplay *disp, | |||
struct GLX_egl_surface *GLX_surf; | |||
uint width, height; | |||
(void) drv; | |||
GLX_surf = CALLOC_STRUCT(GLX_egl_surface); | |||
if (!GLX_surf) { | |||
_eglError(EGL_BAD_ALLOC, "eglCreatePixmapSurface"); | |||
@@ -762,6 +774,8 @@ GLX_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp, | |||
int attribs[5]; | |||
int i; | |||
(void) drv; | |||
GLX_surf = CALLOC_STRUCT(GLX_egl_surface); | |||
if (!GLX_surf) { | |||
_eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); | |||
@@ -820,6 +834,8 @@ GLX_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp, | |||
static EGLBoolean | |||
GLX_eglDestroySurface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) | |||
{ | |||
(void) drv; | |||
if (!_eglIsSurfaceBound(surf)) | |||
destroy_surface(disp, surf); | |||
@@ -833,6 +849,8 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) | |||
struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp); | |||
struct GLX_egl_surface *GLX_surf = GLX_egl_surface(draw); | |||
(void) drv; | |||
glXSwapBuffers(GLX_dpy->dpy, GLX_surf->glx_drawable); | |||
return EGL_TRUE; | |||
@@ -844,12 +862,18 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) | |||
static _EGLProc | |||
GLX_eglGetProcAddress(_EGLDriver *drv, const char *procname) | |||
{ | |||
(void) drv; | |||
return (_EGLProc) glXGetProcAddress((const GLubyte *) procname); | |||
} | |||
static EGLBoolean | |||
GLX_eglWaitClient(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) | |||
{ | |||
(void) drv; | |||
(void) dpy; | |||
(void) ctx; | |||
glXWaitGL(); | |||
return EGL_TRUE; | |||
} | |||
@@ -857,6 +881,9 @@ GLX_eglWaitClient(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) | |||
static EGLBoolean | |||
GLX_eglWaitNative(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine) | |||
{ | |||
(void) drv; | |||
(void) dpy; | |||
if (engine != EGL_CORE_NATIVE_ENGINE) | |||
return _eglError(EGL_BAD_PARAMETER, "eglWaitNative"); | |||
glXWaitX(); | |||
@@ -880,6 +907,8 @@ _eglMain(const char *args) | |||
{ | |||
struct GLX_egl_driver *GLX_drv = CALLOC_STRUCT(GLX_egl_driver); | |||
(void) args; | |||
if (!GLX_drv) | |||
return NULL; | |||
@@ -57,7 +57,7 @@ EGL_NATIVE_PLATFORM=_EGL_INVALID_PLATFORM | |||
ifeq ($(firstword $(EGL_PLATFORMS)),x11) | |||
EGL_NATIVE_PLATFORM=_EGL_PLATFORM_X11 | |||
endif | |||
ifeq ($(firstword $(EGL_PLATFORMS)),kms) | |||
ifeq ($(firstword $(EGL_PLATFORMS)),drm) | |||
EGL_NATIVE_PLATFORM=_EGL_PLATFORM_DRM | |||
endif | |||
ifeq ($(firstword $(EGL_PLATFORMS)),fbdev) |
@@ -402,10 +402,15 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list, | |||
_EGLContext *context; | |||
EGLContext ret; | |||
if (config) | |||
_EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv); | |||
else | |||
_EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv); | |||
_EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv); | |||
if (!config) { | |||
/* config may be NULL if surfaceless */ | |||
if (!disp->Extensions.KHR_surfaceless_gles1 && | |||
!disp->Extensions.KHR_surfaceless_gles2 && | |||
!disp->Extensions.KHR_surfaceless_opengl) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT); | |||
} | |||
if (!share && share_list != EGL_NO_CONTEXT) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_CONTEXT); | |||
@@ -459,9 +464,19 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, | |||
if (!context && ctx != EGL_NO_CONTEXT) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_FALSE); | |||
if ((!draw_surf && draw != EGL_NO_SURFACE) || | |||
(!read_surf && read != EGL_NO_SURFACE)) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); | |||
if (!draw_surf || !read_surf) { | |||
/* surfaces may be NULL if surfaceless */ | |||
if (!disp->Extensions.KHR_surfaceless_gles1 && | |||
!disp->Extensions.KHR_surfaceless_gles2 && | |||
!disp->Extensions.KHR_surfaceless_opengl) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); | |||
if ((!draw_surf && draw != EGL_NO_SURFACE) || | |||
(!read_surf && read != EGL_NO_SURFACE)) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); | |||
if (draw_surf || read_surf) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE); | |||
} | |||
ret = drv->API.MakeCurrent(drv, disp, draw_surf, read_surf, context); | |||
@@ -1276,6 +1291,8 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, | |||
EGLImageKHR ret; | |||
_EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv); | |||
if (!disp->Extensions.KHR_image_base) | |||
RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR); | |||
if (!context && ctx != EGL_NO_CONTEXT) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_IMAGE_KHR); | |||
@@ -1296,6 +1313,8 @@ eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image) | |||
EGLBoolean ret; | |||
_EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); | |||
if (!disp->Extensions.KHR_image_base) | |||
RETURN_EGL_EVAL(disp, EGL_FALSE); | |||
if (!img) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE); | |||
@@ -1321,6 +1340,8 @@ eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) | |||
EGLSyncKHR ret; | |||
_EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv); | |||
if (!disp->Extensions.KHR_reusable_sync) | |||
RETURN_EGL_EVAL(disp, EGL_NO_SYNC_KHR); | |||
sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list); | |||
ret = (sync) ? _eglLinkSync(sync, disp) : EGL_NO_SYNC_KHR; | |||
@@ -1338,6 +1359,8 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) | |||
EGLBoolean ret; | |||
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); | |||
assert(disp->Extensions.KHR_reusable_sync); | |||
_eglUnlinkSync(s); | |||
ret = drv->API.DestroySyncKHR(drv, disp, s); | |||
@@ -1354,6 +1377,7 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t | |||
EGLint ret; | |||
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); | |||
assert(disp->Extensions.KHR_reusable_sync); | |||
ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout); | |||
RETURN_EGL_EVAL(disp, ret); | |||
@@ -1369,6 +1393,7 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode) | |||
EGLBoolean ret; | |||
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); | |||
assert(disp->Extensions.KHR_reusable_sync); | |||
ret = drv->API.SignalSyncKHR(drv, disp, s, mode); | |||
RETURN_EGL_EVAL(disp, ret); | |||
@@ -1384,6 +1409,7 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *v | |||
EGLBoolean ret; | |||
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); | |||
assert(disp->Extensions.KHR_reusable_sync); | |||
ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value); | |||
RETURN_EGL_EVAL(disp, ret); | |||
@@ -1407,14 +1433,14 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface, | |||
_EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv); | |||
if (!disp->Extensions.NOK_swap_region) | |||
RETURN_EGL_EVAL(disp, EGL_FALSE); | |||
/* surface must be bound to current context in EGL 1.4 */ | |||
if (!ctx || !_eglIsContextLinked(ctx) || surf != ctx->DrawSurface) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); | |||
if (drv->API.SwapBuffersRegionNOK) | |||
ret = drv->API.SwapBuffersRegionNOK(drv, disp, surf, numRects, rects); | |||
else | |||
ret = drv->API.SwapBuffers(drv, disp, surf); | |||
ret = drv->API.SwapBuffersRegionNOK(drv, disp, surf, numRects, rects); | |||
RETURN_EGL_EVAL(disp, ret); | |||
} | |||
@@ -1433,6 +1459,8 @@ eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list) | |||
EGLImageKHR ret; | |||
_EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv); | |||
if (!disp->Extensions.MESA_drm_image) | |||
RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR); | |||
img = drv->API.CreateDRMImageMESA(drv, disp, attr_list); | |||
ret = (img) ? _eglLinkImage(img, disp) : EGL_NO_IMAGE_KHR; | |||
@@ -1450,6 +1478,8 @@ eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image, | |||
EGLBoolean ret; | |||
_EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); | |||
assert(disp->Extensions.MESA_drm_image); | |||
if (!img) | |||
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE); | |||
@@ -24,20 +24,18 @@ | |||
* IDs are from 1 to N respectively. | |||
*/ | |||
void | |||
_eglInitConfig(_EGLConfig *config, _EGLDisplay *dpy, EGLint id) | |||
_eglInitConfig(_EGLConfig *conf, _EGLDisplay *dpy, EGLint id) | |||
{ | |||
memset(config, 0, sizeof(*config)); | |||
memset(conf, 0, sizeof(*conf)); | |||
config->Display = dpy; | |||
conf->Display = dpy; | |||
/* some attributes take non-zero default values */ | |||
SET_CONFIG_ATTRIB(config, EGL_CONFIG_ID, id); | |||
SET_CONFIG_ATTRIB(config, EGL_CONFIG_CAVEAT, EGL_NONE); | |||
SET_CONFIG_ATTRIB(config, EGL_TRANSPARENT_TYPE, EGL_NONE); | |||
SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_TYPE, EGL_NONE); | |||
#ifdef EGL_VERSION_1_2 | |||
SET_CONFIG_ATTRIB(config, EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER); | |||
#endif /* EGL_VERSION_1_2 */ | |||
conf->ConfigID = id; | |||
conf->ConfigCaveat = EGL_NONE; | |||
conf->TransparentType = EGL_NONE; | |||
conf->NativeVisualType = EGL_NONE; | |||
conf->ColorBufferType = EGL_RGB_BUFFER; | |||
} | |||
@@ -51,7 +49,7 @@ EGLConfig | |||
_eglAddConfig(_EGLDisplay *dpy, _EGLConfig *conf) | |||
{ | |||
/* sanity check */ | |||
assert(GET_CONFIG_ATTRIB(conf, EGL_CONFIG_ID) > 0); | |||
assert(conf->ConfigID > 0); | |||
if (!dpy->Configs) { | |||
dpy->Configs = _eglCreateArray("Config", 16); | |||
@@ -104,6 +102,7 @@ static const struct { | |||
EGLint default_value; | |||
} _eglValidationTable[] = | |||
{ | |||
/* core */ | |||
{ EGL_BUFFER_SIZE, ATTRIB_TYPE_INTEGER, | |||
ATTRIB_CRITERION_ATLEAST, | |||
0 }, | |||
@@ -200,22 +199,13 @@ static const struct { | |||
{ EGL_TRANSPARENT_BLUE_VALUE, ATTRIB_TYPE_INTEGER, | |||
ATTRIB_CRITERION_EXACT, | |||
EGL_DONT_CARE }, | |||
/* these are not real attributes */ | |||
{ EGL_MATCH_NATIVE_PIXMAP, ATTRIB_TYPE_PSEUDO, | |||
ATTRIB_CRITERION_SPECIAL, | |||
EGL_NONE }, | |||
/* there is a gap before EGL_SAMPLES */ | |||
{ 0x3030, ATTRIB_TYPE_PSEUDO, | |||
ATTRIB_CRITERION_IGNORE, | |||
0 }, | |||
{ EGL_NONE, ATTRIB_TYPE_PSEUDO, | |||
ATTRIB_CRITERION_IGNORE, | |||
0 }, | |||
/* extensions */ | |||
{ EGL_Y_INVERTED_NOK, ATTRIB_TYPE_BOOLEAN, | |||
ATTRIB_CRITERION_EXACT, | |||
EGL_DONT_CARE }, | |||
EGL_DONT_CARE } | |||
}; | |||
@@ -232,18 +222,13 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) | |||
{ | |||
EGLint i, attr, val; | |||
EGLBoolean valid = EGL_TRUE; | |||
EGLint red_size = 0, green_size = 0, blue_size = 0, luminance_size = 0; | |||
EGLint alpha_size = 0, buffer_size = 0; | |||
/* all attributes should have been listed */ | |||
assert(ARRAY_SIZE(_eglValidationTable) == _EGL_CONFIG_NUM_ATTRIBS); | |||
/* check attributes by their types */ | |||
for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) { | |||
EGLint mask; | |||
attr = _eglValidationTable[i].attr; | |||
val = GET_CONFIG_ATTRIB(conf, attr); | |||
val = _eglGetConfigKey(conf, attr); | |||
switch (_eglValidationTable[i].type) { | |||
case ATTRIB_TYPE_INTEGER: | |||
@@ -255,30 +240,14 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) | |||
break; | |||
case EGL_SAMPLE_BUFFERS: | |||
/* there can be at most 1 sample buffer */ | |||
if (val > 1) | |||
if (val > 1 || val < 0) | |||
valid = EGL_FALSE; | |||
break; | |||
case EGL_RED_SIZE: | |||
red_size = val; | |||
break; | |||
case EGL_GREEN_SIZE: | |||
green_size = val; | |||
break; | |||
case EGL_BLUE_SIZE: | |||
blue_size = val; | |||
break; | |||
case EGL_LUMINANCE_SIZE: | |||
luminance_size = val; | |||
break; | |||
case EGL_ALPHA_SIZE: | |||
alpha_size = val; | |||
break; | |||
case EGL_BUFFER_SIZE: | |||
buffer_size = val; | |||
default: | |||
if (val < 0) | |||
valid = EGL_FALSE; | |||
break; | |||
} | |||
if (val < 0) | |||
valid = EGL_FALSE; | |||
break; | |||
case ATTRIB_TYPE_BOOLEAN: | |||
if (val != EGL_TRUE && val != EGL_FALSE) | |||
@@ -366,17 +335,18 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) | |||
/* now check for conflicting attribute values */ | |||
switch (GET_CONFIG_ATTRIB(conf, EGL_COLOR_BUFFER_TYPE)) { | |||
switch (conf->ColorBufferType) { | |||
case EGL_RGB_BUFFER: | |||
if (luminance_size) | |||
if (conf->LuminanceSize) | |||
valid = EGL_FALSE; | |||
if (red_size + green_size + blue_size + alpha_size != buffer_size) | |||
if (conf->RedSize + conf->GreenSize + | |||
conf->BlueSize + conf->AlphaSize != conf->BufferSize) | |||
valid = EGL_FALSE; | |||
break; | |||
case EGL_LUMINANCE_BUFFER: | |||
if (red_size || green_size || blue_size) | |||
if (conf->RedSize || conf->GreenSize || conf->BlueSize) | |||
valid = EGL_FALSE; | |||
if (luminance_size + alpha_size != buffer_size) | |||
if (conf->LuminanceSize + conf->AlphaSize != conf->BufferSize) | |||
valid = EGL_FALSE; | |||
break; | |||
} | |||
@@ -385,23 +355,19 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) | |||
return EGL_FALSE; | |||
} | |||
val = GET_CONFIG_ATTRIB(conf, EGL_SAMPLE_BUFFERS); | |||
if (!val && GET_CONFIG_ATTRIB(conf, EGL_SAMPLES)) | |||
if (!conf->SampleBuffers && conf->Samples) | |||
valid = EGL_FALSE; | |||
if (!valid) { | |||
_eglLog(_EGL_DEBUG, "conflicting samples and sample buffers"); | |||
return EGL_FALSE; | |||
} | |||
val = GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE); | |||
if (!(val & EGL_WINDOW_BIT)) { | |||
if (GET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_ID) != 0 || | |||
GET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE) != EGL_NONE) | |||
if (!(conf->SurfaceType & EGL_WINDOW_BIT)) { | |||
if (conf->NativeVisualID != 0 || conf->NativeVisualType != EGL_NONE) | |||
valid = EGL_FALSE; | |||
} | |||
if (!(val & EGL_PBUFFER_BIT)) { | |||
if (GET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGB) || | |||
GET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGBA)) | |||
if (!(conf->SurfaceType & EGL_PBUFFER_BIT)) { | |||
if (conf->BindToTextureRGB || conf->BindToTextureRGBA) | |||
valid = EGL_FALSE; | |||
} | |||
if (!valid) { | |||
@@ -433,11 +399,11 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria) | |||
continue; | |||
attr = _eglValidationTable[i].attr; | |||
cmp = GET_CONFIG_ATTRIB(criteria, attr); | |||
cmp = _eglGetConfigKey(criteria, attr); | |||
if (cmp == EGL_DONT_CARE) | |||
continue; | |||
val = GET_CONFIG_ATTRIB(conf, attr); | |||
val = _eglGetConfigKey(conf, attr); | |||
switch (_eglValidationTable[i].criterion) { | |||
case ATTRIB_CRITERION_EXACT: | |||
if (val != cmp) | |||
@@ -478,16 +444,11 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria) | |||
static INLINE EGLBoolean | |||
_eglIsConfigAttribValid(_EGLConfig *conf, EGLint attr) | |||
{ | |||
if (_eglIndexConfig(conf, attr) < 0) | |||
if (_eglOffsetOfConfig(attr) < 0) | |||
return EGL_FALSE; | |||
/* there are some holes in the range */ | |||
switch (attr) { | |||
case 0x3030 /* a gap before EGL_SAMPLES */: | |||
case EGL_NONE: | |||
#ifdef EGL_VERSION_1_4 | |||
case EGL_MATCH_NATIVE_PIXMAP: | |||
#endif | |||
return EGL_FALSE; | |||
case EGL_Y_INVERTED_NOK: | |||
return conf->Display->Extensions.NOK_texture_from_pixmap; | |||
@@ -506,15 +467,12 @@ EGLBoolean | |||
_eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list) | |||
{ | |||
EGLint attr, val, i; | |||
EGLint config_id = 0, level = 0; | |||
EGLBoolean has_native_visual_type = EGL_FALSE; | |||
EGLBoolean has_transparent_color = EGL_FALSE; | |||
/* reset to default values */ | |||
for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) { | |||
attr = _eglValidationTable[i].attr; | |||
val = _eglValidationTable[i].default_value; | |||
SET_CONFIG_ATTRIB(conf, attr, val); | |||
_eglSetConfigKey(conf, attr, val); | |||
} | |||
/* parse the list */ | |||
@@ -524,59 +482,33 @@ _eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list) | |||
if (!_eglIsConfigAttribValid(conf, attr)) | |||
return EGL_FALSE; | |||
SET_CONFIG_ATTRIB(conf, attr, val); | |||
/* rememeber some attributes for post-processing */ | |||
switch (attr) { | |||
case EGL_CONFIG_ID: | |||
config_id = val; | |||
break; | |||
case EGL_LEVEL: | |||
level = val; | |||
break; | |||
case EGL_NATIVE_VISUAL_TYPE: | |||
has_native_visual_type = EGL_TRUE; | |||
break; | |||
case EGL_TRANSPARENT_RED_VALUE: | |||
case EGL_TRANSPARENT_GREEN_VALUE: | |||
case EGL_TRANSPARENT_BLUE_VALUE: | |||
has_transparent_color = EGL_TRUE; | |||
break; | |||
default: | |||
break; | |||
} | |||
_eglSetConfigKey(conf, attr, val); | |||
} | |||
if (!_eglValidateConfig(conf, EGL_TRUE)) | |||
return EGL_FALSE; | |||
/* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */ | |||
if (level == EGL_DONT_CARE) | |||
if (conf->Level == EGL_DONT_CARE) | |||
return EGL_FALSE; | |||
/* ignore other attributes when EGL_CONFIG_ID is given */ | |||
if (config_id > 0) { | |||
_eglResetConfigKeys(conf, EGL_DONT_CARE); | |||
SET_CONFIG_ATTRIB(conf, EGL_CONFIG_ID, config_id); | |||
if (conf->ConfigID > 0) { | |||
for (i = 0; i < ARRAY_SIZE(_eglValidationTable); i++) { | |||
attr = _eglValidationTable[i].attr; | |||
if (attr != EGL_CONFIG_ID) | |||
_eglSetConfigKey(conf, attr, EGL_DONT_CARE); | |||
} | |||
} | |||
else { | |||
if (has_native_visual_type) { | |||
val = GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE); | |||
if (!(val & EGL_WINDOW_BIT)) | |||
SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE, EGL_DONT_CARE); | |||
} | |||
if (!(conf->SurfaceType & EGL_WINDOW_BIT)) | |||
conf->NativeVisualType = EGL_DONT_CARE; | |||
if (has_transparent_color) { | |||
val = GET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_TYPE); | |||
if (val == EGL_NONE) { | |||
SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_RED_VALUE, | |||
EGL_DONT_CARE); | |||
SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_GREEN_VALUE, | |||
EGL_DONT_CARE); | |||
SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_BLUE_VALUE, | |||
EGL_DONT_CARE); | |||
} | |||
if (conf->TransparentType == EGL_NONE) { | |||
conf->TransparentRedValue = EGL_DONT_CARE; | |||
conf->TransparentGreenValue = EGL_DONT_CARE; | |||
conf->TransparentBlueValue = EGL_DONT_CARE; | |||
} | |||
} | |||
@@ -610,7 +542,6 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2, | |||
EGL_ALPHA_MASK_SIZE, | |||
}; | |||
EGLint val1, val2; | |||
EGLBoolean rgb_buffer; | |||
EGLint i; | |||
if (conf1 == conf2) | |||
@@ -619,44 +550,41 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2, | |||
/* the enum values have the desired ordering */ | |||
assert(EGL_NONE < EGL_SLOW_CONFIG); | |||
assert(EGL_SLOW_CONFIG < EGL_NON_CONFORMANT_CONFIG); | |||
val1 = GET_CONFIG_ATTRIB(conf1, EGL_CONFIG_CAVEAT); | |||
val2 = GET_CONFIG_ATTRIB(conf2, EGL_CONFIG_CAVEAT); | |||
if (val1 != val2) | |||
return (val1 - val2); | |||
val1 = conf1->ConfigCaveat - conf2->ConfigCaveat; | |||
if (val1) | |||
return val1; | |||
/* the enum values have the desired ordering */ | |||
assert(EGL_RGB_BUFFER < EGL_LUMINANCE_BUFFER); | |||
val1 = GET_CONFIG_ATTRIB(conf1, EGL_COLOR_BUFFER_TYPE); | |||
val2 = GET_CONFIG_ATTRIB(conf2, EGL_COLOR_BUFFER_TYPE); | |||
if (val1 != val2) | |||
return (val1 - val2); | |||
rgb_buffer = (val1 == EGL_RGB_BUFFER); | |||
val1 = conf1->ColorBufferType - conf2->ColorBufferType; | |||
if (val1) | |||
return val1; | |||
if (criteria) { | |||
val1 = val2 = 0; | |||
if (rgb_buffer) { | |||
if (GET_CONFIG_ATTRIB(criteria, EGL_RED_SIZE) > 0) { | |||
val1 += GET_CONFIG_ATTRIB(conf1, EGL_RED_SIZE); | |||
val2 += GET_CONFIG_ATTRIB(conf2, EGL_RED_SIZE); | |||
if (conf1->ColorBufferType == EGL_RGB_BUFFER) { | |||
if (criteria->RedSize > 0) { | |||
val1 += conf1->RedSize; | |||
val2 += conf2->RedSize; | |||
} | |||
if (GET_CONFIG_ATTRIB(criteria, EGL_GREEN_SIZE) > 0) { | |||
val1 += GET_CONFIG_ATTRIB(conf1, EGL_GREEN_SIZE); | |||
val2 += GET_CONFIG_ATTRIB(conf2, EGL_GREEN_SIZE); | |||
if (criteria->GreenSize > 0) { | |||
val1 += conf1->GreenSize; | |||
val2 += conf2->GreenSize; | |||
} | |||
if (GET_CONFIG_ATTRIB(criteria, EGL_BLUE_SIZE) > 0) { | |||
val1 += GET_CONFIG_ATTRIB(conf1, EGL_BLUE_SIZE); | |||
val2 += GET_CONFIG_ATTRIB(conf2, EGL_BLUE_SIZE); | |||
if (criteria->BlueSize > 0) { | |||
val1 += conf1->BlueSize; | |||
val2 += conf2->BlueSize; | |||
} | |||
} | |||
else { | |||
if (GET_CONFIG_ATTRIB(criteria, EGL_LUMINANCE_SIZE) > 0) { | |||
val1 += GET_CONFIG_ATTRIB(conf1, EGL_LUMINANCE_SIZE); | |||
val2 += GET_CONFIG_ATTRIB(conf2, EGL_LUMINANCE_SIZE); | |||
if (criteria->LuminanceSize > 0) { | |||
val1 += conf1->LuminanceSize; | |||
val2 += conf2->LuminanceSize; | |||
} | |||
} | |||
if (GET_CONFIG_ATTRIB(criteria, EGL_ALPHA_SIZE) > 0) { | |||
val1 += GET_CONFIG_ATTRIB(conf1, EGL_ALPHA_SIZE); | |||
val2 += GET_CONFIG_ATTRIB(conf2, EGL_ALPHA_SIZE); | |||
if (criteria->AlphaSize > 0) { | |||
val1 += conf1->AlphaSize; | |||
val2 += conf2->AlphaSize; | |||
} | |||
} | |||
else { | |||
@@ -669,24 +597,15 @@ _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2, | |||
return (val2 - val1); | |||
for (i = 0; i < ARRAY_SIZE(compare_attribs); i++) { | |||
val1 = GET_CONFIG_ATTRIB(conf1, compare_attribs[i]); | |||
val2 = GET_CONFIG_ATTRIB(conf2, compare_attribs[i]); | |||
val1 = _eglGetConfigKey(conf1, compare_attribs[i]); | |||
val2 = _eglGetConfigKey(conf2, compare_attribs[i]); | |||
if (val1 != val2) | |||
return (val1 - val2); | |||
} | |||
/* EGL_NATIVE_VISUAL_TYPE cannot be compared here */ | |||
if (compare_id) { | |||
val1 = GET_CONFIG_ATTRIB(conf1, EGL_CONFIG_ID); | |||
val2 = GET_CONFIG_ATTRIB(conf2, EGL_CONFIG_ID); | |||
assert(val1 != val2); | |||
} | |||
else { | |||
val1 = val2 = 0; | |||
} | |||
return (val1 - val2); | |||
return (compare_id) ? (conf1->ConfigID - conf2->ConfigID) : 0; | |||
} | |||
@@ -802,7 +721,7 @@ _eglGetConfigAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, | |||
if (!value) | |||
return _eglError(EGL_BAD_PARAMETER, "eglGetConfigAttrib"); | |||
*value = GET_CONFIG_ATTRIB(conf, attribute); | |||
*value = _eglGetConfigKey(conf, attribute); | |||
return EGL_TRUE; | |||
} | |||
@@ -6,26 +6,49 @@ | |||
#include "egltypedefs.h" | |||
#define _EGL_CONFIG_FIRST_ATTRIB EGL_BUFFER_SIZE | |||
#define _EGL_CONFIG_LAST_ATTRIB EGL_CONFORMANT | |||
#define _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS \ | |||
(_EGL_CONFIG_LAST_ATTRIB - _EGL_CONFIG_FIRST_ATTRIB + 1) | |||
/* Attributes outside the contiguous block: | |||
* | |||
* EGL_Y_INVERTED_NOK | |||
*/ | |||
#define _EGL_CONFIG_FIRST_EXTRA_ATTRIB _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS | |||
#define _EGL_CONFIG_NUM_EXTRA_ATTRIBS 1 | |||
#define _EGL_CONFIG_NUM_ATTRIBS \ | |||
_EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS + _EGL_CONFIG_NUM_EXTRA_ATTRIBS | |||
/* update _eglValidationTable and _eglOffsetOfConfig before updating this | |||
* struct */ | |||
struct _egl_config | |||
{ | |||
_EGLDisplay *Display; | |||
EGLint Storage[_EGL_CONFIG_NUM_ATTRIBS]; | |||
/* core */ | |||
EGLint BufferSize; | |||
EGLint AlphaSize; | |||
EGLint BlueSize; | |||
EGLint GreenSize; | |||
EGLint RedSize; | |||
EGLint DepthSize; | |||
EGLint StencilSize; | |||
EGLint ConfigCaveat; | |||
EGLint ConfigID; | |||
EGLint Level; | |||
EGLint MaxPbufferHeight; | |||
EGLint MaxPbufferPixels; | |||
EGLint MaxPbufferWidth; | |||
EGLint NativeRenderable; | |||
EGLint NativeVisualID; | |||
EGLint NativeVisualType; | |||
EGLint Samples; | |||
EGLint SampleBuffers; | |||
EGLint SurfaceType; | |||
EGLint TransparentType; | |||
EGLint TransparentBlueValue; | |||
EGLint TransparentGreenValue; | |||
EGLint TransparentRedValue; | |||
EGLint BindToTextureRGB; | |||
EGLint BindToTextureRGBA; | |||
EGLint MinSwapInterval; | |||
EGLint MaxSwapInterval; | |||
EGLint LuminanceSize; | |||
EGLint AlphaMaskSize; | |||
EGLint ColorBufferType; | |||
EGLint RenderableType; | |||
EGLint MatchNativePixmap; | |||
EGLint Conformant; | |||
/* extensions */ | |||
EGLint YInvertedNOK; | |||
}; | |||
@@ -37,38 +60,56 @@ struct _egl_config | |||
/** | |||
* Given a key, return an index into the storage of the config. | |||
* Return -1 if the key is invalid. | |||
* Map an EGL attribute enum to the offset of the member in _EGLConfig. | |||
*/ | |||
static INLINE EGLint | |||
_eglIndexConfig(const _EGLConfig *conf, EGLint key) | |||
_eglOffsetOfConfig(EGLint attr) | |||
{ | |||
(void) conf; | |||
if (key >= _EGL_CONFIG_FIRST_ATTRIB && | |||
key < _EGL_CONFIG_FIRST_ATTRIB + _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS) | |||
return key - _EGL_CONFIG_FIRST_ATTRIB; | |||
switch (key) { | |||
case EGL_Y_INVERTED_NOK: | |||
return _EGL_CONFIG_FIRST_EXTRA_ATTRIB; | |||
switch (attr) { | |||
#define ATTRIB_MAP(attr, memb) case attr: return offsetof(_EGLConfig, memb) | |||
/* core */ | |||
ATTRIB_MAP(EGL_BUFFER_SIZE, BufferSize); | |||
ATTRIB_MAP(EGL_ALPHA_SIZE, AlphaSize); | |||
ATTRIB_MAP(EGL_BLUE_SIZE, BlueSize); | |||
ATTRIB_MAP(EGL_GREEN_SIZE, GreenSize); | |||
ATTRIB_MAP(EGL_RED_SIZE, RedSize); | |||
ATTRIB_MAP(EGL_DEPTH_SIZE, DepthSize); | |||
ATTRIB_MAP(EGL_STENCIL_SIZE, StencilSize); | |||
ATTRIB_MAP(EGL_CONFIG_CAVEAT, ConfigCaveat); | |||
ATTRIB_MAP(EGL_CONFIG_ID, ConfigID); | |||
ATTRIB_MAP(EGL_LEVEL, Level); | |||
ATTRIB_MAP(EGL_MAX_PBUFFER_HEIGHT, MaxPbufferHeight); | |||
ATTRIB_MAP(EGL_MAX_PBUFFER_PIXELS, MaxPbufferPixels); | |||
ATTRIB_MAP(EGL_MAX_PBUFFER_WIDTH, MaxPbufferWidth); | |||
ATTRIB_MAP(EGL_NATIVE_RENDERABLE, NativeRenderable); | |||
ATTRIB_MAP(EGL_NATIVE_VISUAL_ID, NativeVisualID); | |||
ATTRIB_MAP(EGL_NATIVE_VISUAL_TYPE, NativeVisualType); | |||
ATTRIB_MAP(EGL_SAMPLES, Samples); | |||
ATTRIB_MAP(EGL_SAMPLE_BUFFERS, SampleBuffers); | |||
ATTRIB_MAP(EGL_SURFACE_TYPE, SurfaceType); | |||
ATTRIB_MAP(EGL_TRANSPARENT_TYPE, TransparentType); | |||
ATTRIB_MAP(EGL_TRANSPARENT_BLUE_VALUE, TransparentBlueValue); | |||
ATTRIB_MAP(EGL_TRANSPARENT_GREEN_VALUE, TransparentGreenValue); | |||
ATTRIB_MAP(EGL_TRANSPARENT_RED_VALUE, TransparentRedValue); | |||
ATTRIB_MAP(EGL_BIND_TO_TEXTURE_RGB, BindToTextureRGB); | |||
ATTRIB_MAP(EGL_BIND_TO_TEXTURE_RGBA, BindToTextureRGBA); | |||
ATTRIB_MAP(EGL_MIN_SWAP_INTERVAL, MinSwapInterval); | |||
ATTRIB_MAP(EGL_MAX_SWAP_INTERVAL, MaxSwapInterval); | |||
ATTRIB_MAP(EGL_LUMINANCE_SIZE, LuminanceSize); | |||
ATTRIB_MAP(EGL_ALPHA_MASK_SIZE, AlphaMaskSize); | |||
ATTRIB_MAP(EGL_COLOR_BUFFER_TYPE, ColorBufferType); | |||
ATTRIB_MAP(EGL_RENDERABLE_TYPE, RenderableType); | |||
ATTRIB_MAP(EGL_MATCH_NATIVE_PIXMAP, MatchNativePixmap); | |||
ATTRIB_MAP(EGL_CONFORMANT, Conformant); | |||
/* extensions */ | |||
ATTRIB_MAP(EGL_Y_INVERTED_NOK, YInvertedNOK); | |||
#undef ATTRIB_MAP | |||
default: | |||
return -1; | |||
} | |||
} | |||
/** | |||
* Reset all keys in the config to a given value. | |||
*/ | |||
static INLINE void | |||
_eglResetConfigKeys(_EGLConfig *conf, EGLint val) | |||
{ | |||
EGLint i; | |||
for (i = 0; i < _EGL_CONFIG_NUM_ATTRIBS; i++) | |||
conf->Storage[i] = val; | |||
} | |||
/** | |||
* Update a config for a given key. | |||
* | |||
@@ -79,9 +120,9 @@ _eglResetConfigKeys(_EGLConfig *conf, EGLint val) | |||
static INLINE void | |||
_eglSetConfigKey(_EGLConfig *conf, EGLint key, EGLint val) | |||
{ | |||
EGLint idx = _eglIndexConfig(conf, key); | |||
assert(idx >= 0); | |||
conf->Storage[idx] = val; | |||
EGLint offset = _eglOffsetOfConfig(key); | |||
assert(offset >= 0); | |||
*((EGLint *) ((char *) conf + offset)) = val; | |||
} | |||
@@ -91,9 +132,9 @@ _eglSetConfigKey(_EGLConfig *conf, EGLint key, EGLint val) | |||
static INLINE EGLint | |||
_eglGetConfigKey(const _EGLConfig *conf, EGLint key) | |||
{ | |||
EGLint idx = _eglIndexConfig(conf, key); | |||
assert(idx >= 0); | |||
return conf->Storage[idx]; | |||
EGLint offset = _eglOffsetOfConfig(key); | |||
assert(offset >= 0); | |||
return *((EGLint *) ((char *) conf + offset)); | |||
} | |||
@@ -113,13 +113,12 @@ _eglInitContext(_EGLContext *ctx, _EGLDisplay *dpy, _EGLConfig *conf, | |||
err = _eglParseContextAttribList(ctx, attrib_list); | |||
if (err == EGL_SUCCESS && ctx->Config) { | |||
EGLint renderable_type, api_bit; | |||
EGLint api_bit; | |||
renderable_type = GET_CONFIG_ATTRIB(ctx->Config, EGL_RENDERABLE_TYPE); | |||
api_bit = _eglGetContextAPIBit(ctx); | |||
if (!(renderable_type & api_bit)) { | |||
if (!(ctx->Config->RenderableType & api_bit)) { | |||
_eglLog(_EGL_DEBUG, "context api is 0x%x while config supports 0x%x", | |||
api_bit, renderable_type); | |||
api_bit, ctx->Config->RenderableType); | |||
err = EGL_BAD_CONFIG; | |||
} | |||
} | |||
@@ -183,7 +182,9 @@ _eglQueryContext(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *c, | |||
switch (attribute) { | |||
case EGL_CONFIG_ID: | |||
*value = GET_CONFIG_ATTRIB(c->Config, EGL_CONFIG_ID); | |||
if (!c->Config) | |||
return _eglError(EGL_BAD_ATTRIBUTE, "eglQueryContext"); | |||
*value = c->Config->ConfigID; | |||
break; | |||
case EGL_CONTEXT_CLIENT_VERSION: | |||
*value = c->ClientVersion; |
@@ -27,7 +27,7 @@ _eglGetNativePlatformFromEnv(void) | |||
} egl_platforms[_EGL_NUM_PLATFORMS] = { | |||
{ _EGL_PLATFORM_WINDOWS, "gdi" }, | |||
{ _EGL_PLATFORM_X11, "x11" }, | |||
{ _EGL_PLATFORM_DRM, "kms" }, | |||
{ _EGL_PLATFORM_DRM, "drm" }, | |||
{ _EGL_PLATFORM_FBDEV, "fbdev" } | |||
}; | |||
_EGLPlatformType plat = _EGL_INVALID_PLATFORM; |
@@ -18,7 +18,6 @@ | |||
#include "eglmisc.h" | |||
#include "eglmode.h" | |||
#include "eglscreen.h" | |||
#include "eglstring.h" | |||
#include "eglsurface.h" | |||
#include "eglimage.h" | |||
#include "eglsync.h" |
@@ -17,12 +17,12 @@ | |||
static void | |||
_eglClampSwapInterval(_EGLSurface *surf, EGLint interval) | |||
{ | |||
EGLint bound = GET_CONFIG_ATTRIB(surf->Config, EGL_MAX_SWAP_INTERVAL); | |||
EGLint bound = surf->Config->MaxSwapInterval; | |||
if (interval >= bound) { | |||
interval = bound; | |||
} | |||
else { | |||
bound = GET_CONFIG_ATTRIB(surf->Config, EGL_MIN_SWAP_INTERVAL); | |||
bound = surf->Config->MinSwapInterval; | |||
if (interval < bound) | |||
interval = bound; | |||
} | |||
@@ -263,7 +263,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, | |||
return EGL_FALSE; | |||
} | |||
if ((GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE) & type) == 0) { | |||
if ((conf->SurfaceType & type) == 0) { | |||
/* The config can't be used to create a surface of this type */ | |||
_eglError(EGL_BAD_CONFIG, func); | |||
return EGL_FALSE; | |||
@@ -333,7 +333,7 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, | |||
*value = surface->Height; | |||
break; | |||
case EGL_CONFIG_ID: | |||
*value = GET_CONFIG_ATTRIB(surface->Config, EGL_CONFIG_ID); | |||
*value = surface->Config->ConfigID; | |||
break; | |||
case EGL_LARGEST_PBUFFER: | |||
*value = surface->LargestPbuffer; | |||
@@ -445,7 +445,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, | |||
switch (attribute) { | |||
case EGL_MIPMAP_LEVEL: | |||
confval = GET_CONFIG_ATTRIB(surface->Config, EGL_RENDERABLE_TYPE); | |||
confval = surface->Config->RenderableType; | |||
if (!(confval & (EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT))) { | |||
err = EGL_BAD_PARAMETER; | |||
break; | |||
@@ -457,7 +457,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, | |||
case EGL_MULTISAMPLE_RESOLVE_DEFAULT: | |||
break; | |||
case EGL_MULTISAMPLE_RESOLVE_BOX: | |||
confval = GET_CONFIG_ATTRIB(surface->Config, EGL_SURFACE_TYPE); | |||
confval = surface->Config->SurfaceType; | |||
if (!(confval & EGL_MULTISAMPLE_RESOLVE_BOX_BIT)) | |||
err = EGL_BAD_MATCH; | |||
break; | |||
@@ -474,7 +474,7 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, | |||
case EGL_BUFFER_DESTROYED: | |||
break; | |||
case EGL_BUFFER_PRESERVED: | |||
confval = GET_CONFIG_ATTRIB(surface->Config, EGL_SURFACE_TYPE); | |||
confval = surface->Config->SurfaceType; | |||
if (!(confval & EGL_SWAP_BEHAVIOR_PRESERVED_BIT)) | |||
err = EGL_BAD_MATCH; | |||
break; |
@@ -40,7 +40,7 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURC | |||
touch depend | |||
$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(GENERATED_SOURCES) 2> /dev/null | |||
$(PROGS): % : %.o | |||
$(PROGS): % : %.o $(PROGS_DEPS) | |||
$(LD) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group | |||
# Emacs tags |
@@ -8,6 +8,7 @@ C_SOURCES = \ | |||
cso_cache/cso_context.c \ | |||
cso_cache/cso_hash.c \ | |||
draw/draw_context.c \ | |||
draw/draw_fs.c \ | |||
draw/draw_gs.c \ | |||
draw/draw_pipe.c \ | |||
draw/draw_pipe_aaline.c \ | |||
@@ -121,6 +122,7 @@ C_SOURCES = \ | |||
util/u_handle_table.c \ | |||
util/u_hash.c \ | |||
util/u_hash_table.c \ | |||
util/u_index_modify.c \ | |||
util/u_keymap.c \ | |||
util/u_linear.c \ | |||
util/u_linkage.c \ | |||
@@ -174,6 +176,7 @@ GALLIVM_SOURCES = \ | |||
gallivm/lp_bld_struct.c \ | |||
gallivm/lp_bld_swizzle.c \ | |||
gallivm/lp_bld_tgsi_aos.c \ | |||
gallivm/lp_bld_tgsi_info.c \ | |||
gallivm/lp_bld_tgsi_soa.c \ | |||
gallivm/lp_bld_type.c \ | |||
draw/draw_llvm.c \ |
@@ -54,6 +54,7 @@ source = [ | |||
'cso_cache/cso_context.c', | |||
'cso_cache/cso_hash.c', | |||
'draw/draw_context.c', | |||
'draw/draw_fs.c', | |||
'draw/draw_gs.c', | |||
'draw/draw_pipe.c', | |||
'draw/draw_pipe_aaline.c', | |||
@@ -170,6 +171,7 @@ source = [ | |||
'util/u_handle_table.c', | |||
'util/u_hash.c', | |||
'util/u_hash_table.c', | |||
'util/u_index_modify.c', | |||
'util/u_keymap.c', | |||
'util/u_linear.c', | |||
'util/u_linkage.c', | |||
@@ -225,6 +227,7 @@ if env['llvm']: | |||
'gallivm/lp_bld_struct.c', | |||
'gallivm/lp_bld_swizzle.c', | |||
'gallivm/lp_bld_tgsi_aos.c', | |||
'gallivm/lp_bld_tgsi_info.c', | |||
'gallivm/lp_bld_tgsi_soa.c', | |||
'gallivm/lp_bld_type.c', | |||
'draw/draw_llvm.c', |
@@ -413,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) | |||
} | |||
/** | |||
* Allocate an extra vertex/geometry shader vertex attribute. | |||
* This is used by some of the optional draw module stages such | |||
* as wide_point which may need to allocate additional generic/texcoord | |||
* attributes. | |||
*/ | |||
int | |||
draw_alloc_extra_vertex_attrib(struct draw_context *draw, | |||
uint semantic_name, uint semantic_index) | |||
{ | |||
const int num_outputs = draw_current_shader_outputs(draw); | |||
const int n = draw->extra_shader_outputs.num; | |||
assert(n < Elements(draw->extra_shader_outputs.semantic_name)); | |||
draw->extra_shader_outputs.semantic_name[n] = semantic_name; | |||
draw->extra_shader_outputs.semantic_index[n] = semantic_index; | |||
draw->extra_shader_outputs.slot[n] = num_outputs + n; | |||
draw->extra_shader_outputs.num++; | |||
return draw->extra_shader_outputs.slot[n]; | |||
} | |||
/** | |||
* Remove all extra vertex attributes that were allocated with | |||
* draw_alloc_extra_vertex_attrib(). | |||
*/ | |||
void | |||
draw_remove_extra_vertex_attribs(struct draw_context *draw) | |||
{ | |||
draw->extra_shader_outputs.num = 0; | |||
} | |||
/** | |||
* Ask the draw module for the location/slot of the given vertex attribute in | |||
* a post-transformed vertex. | |||
@@ -446,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw, | |||
return i; | |||
} | |||
/* XXX there may be more than one extra vertex attrib. | |||
* For example, simulated gl_FragCoord and gl_PointCoord. | |||
*/ | |||
if (draw->extra_shader_outputs.semantic_name == semantic_name && | |||
draw->extra_shader_outputs.semantic_index == semantic_index) { | |||
return draw->extra_shader_outputs.slot; | |||
/* Search the extra vertex attributes */ | |||
for (i = 0; i < draw->extra_shader_outputs.num; i++) { | |||
if (draw->extra_shader_outputs.semantic_name[i] == semantic_name && | |||
draw->extra_shader_outputs.semantic_index[i] == semantic_index) { | |||
return draw->extra_shader_outputs.slot[i]; | |||
} | |||
} | |||
return 0; | |||
@@ -470,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw, | |||
uint | |||
draw_num_shader_outputs(const struct draw_context *draw) | |||
{ | |||
uint count = draw->vs.vertex_shader->info.num_outputs; | |||
uint count; | |||
/* If a geometry shader is present, its outputs go to the | |||
* driver, else the vertex shader's outputs. | |||
*/ | |||
if (draw->gs.geometry_shader) | |||
count = draw->gs.geometry_shader->info.num_outputs; | |||
else | |||
count = draw->vs.vertex_shader->info.num_outputs; | |||
count += draw->extra_shader_outputs.num; | |||
if (draw->extra_shader_outputs.slot > 0) | |||
count++; | |||
return count; | |||
} | |||
@@ -671,6 +709,11 @@ draw_set_samplers(struct draw_context *draw, | |||
draw->samplers[i] = NULL; | |||
draw->num_samplers = num; | |||
#ifdef HAVE_LLVM | |||
if (draw->llvm) | |||
draw_llvm_set_sampler_state(draw); | |||
#endif | |||
} | |||
void | |||
@@ -678,9 +721,9 @@ draw_set_mapped_texture(struct draw_context *draw, | |||
unsigned sampler_idx, | |||
uint32_t width, uint32_t height, uint32_t depth, | |||
uint32_t last_level, | |||
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
const void *data[DRAW_MAX_TEXTURE_LEVELS]) | |||
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
const void *data[PIPE_MAX_TEXTURE_LEVELS]) | |||
{ | |||
#ifdef HAVE_LLVM | |||
if(draw->llvm) |
@@ -46,9 +46,9 @@ struct draw_context; | |||
struct draw_stage; | |||
struct draw_vertex_shader; | |||
struct draw_geometry_shader; | |||
struct draw_fragment_shader; | |||
struct tgsi_sampler; | |||
#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ | |||
struct draw_context *draw_create( struct pipe_context *pipe ); | |||
@@ -119,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw, | |||
unsigned sampler_idx, | |||
uint32_t width, uint32_t height, uint32_t depth, | |||
uint32_t last_level, | |||
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
const void *data[DRAW_MAX_TEXTURE_LEVELS]); | |||
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
const void *data[PIPE_MAX_TEXTURE_LEVELS]); | |||
/* | |||
@@ -137,6 +137,17 @@ void draw_delete_vertex_shader(struct draw_context *draw, | |||
struct draw_vertex_shader *dvs); | |||
/* | |||
* Fragment shader functions | |||
*/ | |||
struct draw_fragment_shader * | |||
draw_create_fragment_shader(struct draw_context *draw, | |||
const struct pipe_shader_state *shader); | |||
void draw_bind_fragment_shader(struct draw_context *draw, | |||
struct draw_fragment_shader *dvs); | |||
void draw_delete_fragment_shader(struct draw_context *draw, | |||
struct draw_fragment_shader *dvs); | |||
/* | |||
* Geometry shader functions | |||
*/ |
@@ -0,0 +1,73 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2010 VMware, Inc. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
#include "pipe/p_shader_tokens.h" | |||
#include "util/u_math.h" | |||
#include "util/u_memory.h" | |||
#include "util/u_prim.h" | |||
#include "tgsi/tgsi_parse.h" | |||
#include "draw_fs.h" | |||
#include "draw_private.h" | |||
#include "draw_context.h" | |||
struct draw_fragment_shader * | |||
draw_create_fragment_shader(struct draw_context *draw, | |||
const struct pipe_shader_state *shader) | |||
{ | |||
struct draw_fragment_shader *dfs; | |||
dfs = CALLOC_STRUCT(draw_fragment_shader); | |||
if (dfs) { | |||
dfs->base = *shader; | |||
tgsi_scan_shader(shader->tokens, &dfs->info); | |||
} | |||
return dfs; | |||
} | |||
void | |||
draw_bind_fragment_shader(struct draw_context *draw, | |||
struct draw_fragment_shader *dfs) | |||
{ | |||
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); | |||
draw->fs.fragment_shader = dfs; | |||
} | |||
void | |||
draw_delete_fragment_shader(struct draw_context *draw, | |||
struct draw_fragment_shader *dfs) | |||
{ | |||
FREE(dfs); | |||
} | |||
@@ -0,0 +1,42 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2010 VMware, Inc. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | |||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR | |||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
**************************************************************************/ | |||
#ifndef DRAW_FS_H | |||
#define DRAW_FS_H | |||
#include "tgsi/tgsi_scan.h" | |||
struct draw_fragment_shader | |||
{ | |||
struct pipe_shader_state base; | |||
struct tgsi_shader_info info; | |||
}; | |||
#endif /* DRAW_FS_H */ |
@@ -44,6 +44,7 @@ | |||
#include "tgsi/tgsi_dump.h" | |||
#include "util/u_cpu_detect.h" | |||
#include "util/u_math.h" | |||
#include "util/u_pointer.h" | |||
#include "util/u_string.h" | |||
@@ -71,12 +72,17 @@ init_globals(struct draw_llvm *llvm) | |||
elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); | |||
elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); | |||
elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = | |||
LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); | |||
LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); | |||
elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = | |||
LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); | |||
LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); | |||
elem_types[DRAW_JIT_TEXTURE_DATA] = | |||
LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), | |||
DRAW_MAX_TEXTURE_LEVELS); | |||
PIPE_MAX_TEXTURE_LEVELS); | |||
elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); | |||
elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); | |||
elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); | |||
elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = | |||
LLVMArrayType(LLVMFloatType(), 4); | |||
texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); | |||
@@ -101,6 +107,18 @@ init_globals(struct draw_llvm *llvm) | |||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, | |||
llvm->target, texture_type, | |||
DRAW_JIT_TEXTURE_DATA); | |||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, | |||
llvm->target, texture_type, | |||
DRAW_JIT_TEXTURE_MIN_LOD); | |||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, | |||
llvm->target, texture_type, | |||
DRAW_JIT_TEXTURE_MAX_LOD); | |||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, | |||
llvm->target, texture_type, | |||
DRAW_JIT_TEXTURE_LOD_BIAS); | |||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, | |||
llvm->target, texture_type, | |||
DRAW_JIT_TEXTURE_BORDER_COLOR); | |||
LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, | |||
llvm->target, texture_type); | |||
@@ -1048,9 +1066,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, | |||
unsigned sampler_idx, | |||
uint32_t width, uint32_t height, uint32_t depth, | |||
uint32_t last_level, | |||
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
const void *data[DRAW_MAX_TEXTURE_LEVELS]) | |||
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
const void *data[PIPE_MAX_TEXTURE_LEVELS]) | |||
{ | |||
unsigned j; | |||
struct draw_jit_texture *jit_tex; | |||
@@ -1072,6 +1090,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, | |||
} | |||
} | |||
void | |||
draw_llvm_set_sampler_state(struct draw_context *draw) | |||
{ | |||
unsigned i; | |||
for (i = 0; i < draw->num_samplers; i++) { | |||
struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i]; | |||
if (draw->samplers[i]) { | |||
jit_tex->min_lod = draw->samplers[i]->min_lod; | |||
jit_tex->max_lod = draw->samplers[i]->max_lod; | |||
jit_tex->lod_bias = draw->samplers[i]->lod_bias; | |||
COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color); | |||
} | |||
} | |||
} | |||
void | |||
draw_llvm_destroy_variant(struct draw_llvm_variant *variant) | |||
{ |
@@ -41,7 +41,6 @@ | |||
#include <llvm-c/Target.h> | |||
#include <llvm-c/ExecutionEngine.h> | |||
#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ | |||
struct draw_llvm; | |||
struct llvm_vertex_shader; | |||
@@ -52,9 +51,13 @@ struct draw_jit_texture | |||
uint32_t height; | |||
uint32_t depth; | |||
uint32_t last_level; | |||
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; | |||
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; | |||
const void *data[DRAW_MAX_TEXTURE_LEVELS]; | |||
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; | |||
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; | |||
const void *data[PIPE_MAX_TEXTURE_LEVELS]; | |||
float min_lod; | |||
float max_lod; | |||
float lod_bias; | |||
float border_color[4]; | |||
}; | |||
enum { | |||
@@ -65,6 +68,10 @@ enum { | |||
DRAW_JIT_TEXTURE_ROW_STRIDE, | |||
DRAW_JIT_TEXTURE_IMG_STRIDE, | |||
DRAW_JIT_TEXTURE_DATA, | |||
DRAW_JIT_TEXTURE_MIN_LOD, | |||
DRAW_JIT_TEXTURE_MAX_LOD, | |||
DRAW_JIT_TEXTURE_LOD_BIAS, | |||
DRAW_JIT_TEXTURE_BORDER_COLOR, | |||
DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ | |||
}; | |||
@@ -274,13 +281,16 @@ struct lp_build_sampler_soa * | |||
draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, | |||
LLVMValueRef context_ptr); | |||
void | |||
draw_llvm_set_sampler_state(struct draw_context *draw); | |||
void | |||
draw_llvm_set_mapped_texture(struct draw_context *draw, | |||
unsigned sampler_idx, | |||
uint32_t width, uint32_t height, uint32_t depth, | |||
uint32_t last_level, | |||
uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], | |||
const void *data[DRAW_MAX_TEXTURE_LEVELS]); | |||
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], | |||
const void *data[PIPE_MAX_TEXTURE_LEVELS]); | |||
#endif |
@@ -146,6 +146,10 @@ DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE) | |||
DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE) | |||
DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE) | |||
DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE) | |||
DRAW_LLVM_TEXTURE_MEMBER(min_lod, DRAW_JIT_TEXTURE_MIN_LOD, TRUE) | |||
DRAW_LLVM_TEXTURE_MEMBER(max_lod, DRAW_JIT_TEXTURE_MAX_LOD, TRUE) | |||
DRAW_LLVM_TEXTURE_MEMBER(lod_bias, DRAW_JIT_TEXTURE_LOD_BIAS, TRUE) | |||
DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE) | |||
static void | |||
@@ -207,6 +211,10 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, | |||
sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride; | |||
sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride; | |||
sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr; | |||
sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod; | |||
sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod; | |||
sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias; | |||
sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color; | |||
sampler->dynamic_state.static_state = static_state; | |||
sampler->dynamic_state.context_ptr = context_ptr; | |||
@@ -688,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) | |||
aaline->tex_slot = draw_current_shader_outputs(draw); | |||
aaline->pos_slot = draw_current_shader_position_output(draw);; | |||
/* advertise the extra post-transformed vertex attribute */ | |||
draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; | |||
draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib; | |||
draw->extra_shader_outputs.slot = aaline->tex_slot; | |||
/* allocate the extra post-transformed vertex attribute */ | |||
(void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC, | |||
aaline->fs->generic_attrib); | |||
/* how many samplers? */ | |||
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ | |||
@@ -744,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags) | |||
draw->suspend_flushing = FALSE; | |||
draw->extra_shader_outputs.slot = 0; | |||
draw_remove_extra_vertex_attribs(draw); | |||
} | |||
@@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) | |||
aapoint->pos_slot = draw_current_shader_position_output(draw); | |||
draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; | |||
draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib; | |||
draw->extra_shader_outputs.slot = aapoint->tex_slot; | |||
/* allocate the extra post-transformed vertex attribute */ | |||
(void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC, | |||
aapoint->fs->generic_attrib); | |||
/* find psize slot in post-transform vertex */ | |||
aapoint->psize_slot = -1; | |||
@@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) | |||
draw->suspend_flushing = FALSE; | |||
draw->extra_shader_outputs.slot = 0; | |||
draw_remove_extra_vertex_attribs(draw); | |||
} | |||
@@ -263,6 +263,8 @@ do_clip_tri( struct draw_stage *stage, | |||
clipmask &= ~(1<<plane_idx); | |||
assert(n < MAX_CLIPPED_VERTICES); | |||
if (n >= MAX_CLIPPED_VERTICES) | |||
return; | |||
inlist[n] = inlist[0]; /* prevent rotation of vertices */ | |||
for (i = 1; i <= n; i++) { | |||
@@ -272,16 +274,22 @@ do_clip_tri( struct draw_stage *stage, | |||
if (!IS_NEGATIVE(dp_prev)) { | |||
assert(outcount < MAX_CLIPPED_VERTICES); | |||
if (outcount >= MAX_CLIPPED_VERTICES) | |||
return; | |||
outlist[outcount++] = vert_prev; | |||
} | |||
if (DIFFERENT_SIGNS(dp, dp_prev)) { | |||
struct vertex_header *new_vert; | |||
assert(tmpnr < MAX_CLIPPED_VERTICES+1); | |||
assert(tmpnr < MAX_CLIPPED_VERTICES + 1); | |||
if (tmpnr >= MAX_CLIPPED_VERTICES + 1) | |||
return; | |||
new_vert = clipper->stage.tmp[tmpnr++]; | |||
assert(outcount < MAX_CLIPPED_VERTICES); | |||
if (outcount >= MAX_CLIPPED_VERTICES) | |||
return; | |||
outlist[outcount++] = new_vert; | |||
if (IS_NEGATIVE(dp)) { | |||
@@ -321,27 +329,32 @@ do_clip_tri( struct draw_stage *stage, | |||
/* If flat-shading, copy provoking vertex color to polygon vertex[0] | |||
*/ | |||
if (clipper->flat) { | |||
if (stage->draw->rasterizer->flatshade_first) { | |||
if (inlist[0] != header->v[0]) { | |||
assert(tmpnr < MAX_CLIPPED_VERTICES + 1); | |||
inlist[0] = dup_vert(stage, inlist[0], tmpnr++); | |||
copy_colors(stage, inlist[0], header->v[0]); | |||
if (n >= 3) { | |||
if (clipper->flat) { | |||
if (stage->draw->rasterizer->flatshade_first) { | |||
if (inlist[0] != header->v[0]) { | |||
assert(tmpnr < MAX_CLIPPED_VERTICES + 1); | |||
if (tmpnr >= MAX_CLIPPED_VERTICES + 1) | |||
return; | |||
inlist[0] = dup_vert(stage, inlist[0], tmpnr++); | |||
copy_colors(stage, inlist[0], header->v[0]); | |||
} | |||
} | |||
} | |||
else { | |||
if (inlist[0] != header->v[2]) { | |||
assert(tmpnr < MAX_CLIPPED_VERTICES + 1); | |||
inlist[0] = dup_vert(stage, inlist[0], tmpnr++); | |||
copy_colors(stage, inlist[0], header->v[2]); | |||
else { | |||
if (inlist[0] != header->v[2]) { | |||
assert(tmpnr < MAX_CLIPPED_VERTICES + 1); | |||
if (tmpnr >= MAX_CLIPPED_VERTICES + 1) | |||
return; | |||
inlist[0] = dup_vert(stage, inlist[0], tmpnr++); | |||
copy_colors(stage, inlist[0], header->v[2]); | |||
} | |||
} | |||
} | |||
} | |||
/* Emit the polygon as triangles to the setup stage: | |||
*/ | |||
if (n >= 3) | |||
/* Emit the polygon as triangles to the setup stage: | |||
*/ | |||
emit_poly( stage, inlist, n, header ); | |||
} | |||
} | |||
@@ -172,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) | |||
wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold | |||
&& !rast->line_smooth); | |||
/* drawing large points? */ | |||
/* drawing large/sprite points (but not AA points)? */ | |||
if (rast->sprite_coord_enable && draw->pipeline.point_sprite) | |||
wide_points = TRUE; | |||
else if (rast->point_smooth && draw->pipeline.aapoint) | |||
@@ -207,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) | |||
precalc_flat = TRUE; | |||
} | |||
if (wide_points || rast->sprite_coord_enable) { | |||
if (wide_points) { | |||
draw->pipeline.wide_point->next = next; | |||
next = draw->pipeline.wide_point; | |||
} |
@@ -57,26 +57,24 @@ | |||
#include "util/u_memory.h" | |||
#include "pipe/p_defines.h" | |||
#include "pipe/p_shader_tokens.h" | |||
#include "draw_fs.h" | |||
#include "draw_vs.h" | |||
#include "draw_pipe.h" | |||
struct widepoint_stage { | |||
struct draw_stage stage; | |||
struct draw_stage stage; /**< base class */ | |||
float half_point_size; | |||
float xbias; | |||
float ybias; | |||
uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; | |||
uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS]; | |||
uint num_texcoords; | |||
uint texcoord_mode; | |||
/** for automatic texcoord generation/replacement */ | |||
uint num_texcoord_gen; | |||
uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS]; | |||
int psize_slot; | |||
int point_coord_fs_input; /**< input for pointcoord */ | |||
}; | |||
@@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage ) | |||
static void set_texcoords(const struct widepoint_stage *wide, | |||
struct vertex_header *v, const float tc[4]) | |||
{ | |||
const struct draw_context *draw = wide->stage.draw; | |||
const struct pipe_rasterizer_state *rast = draw->rasterizer; | |||
const uint texcoord_mode = rast->sprite_coord_mode; | |||
uint i; | |||
for (i = 0; i < wide->num_texcoords; i++) { | |||
if (wide->texcoord_enable[i]) { | |||
uint j = wide->texcoord_slot[i]; | |||
v->data[j][0] = tc[0]; | |||
if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) | |||
v->data[j][1] = 1.0f - tc[1]; | |||
else | |||
v->data[j][1] = tc[1]; | |||
v->data[j][2] = tc[2]; | |||
v->data[j][3] = tc[3]; | |||
} | |||
} | |||
if (wide->point_coord_fs_input >= 0) { | |||
/* put gl_PointCoord into the extra vertex slot */ | |||
uint slot = wide->stage.draw->extra_shader_outputs.slot; | |||
for (i = 0; i < wide->num_texcoord_gen; i++) { | |||
const uint slot = wide->texcoord_gen_slot[i]; | |||
v->data[slot][0] = tc[0]; | |||
if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) | |||
if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) | |||
v->data[slot][1] = 1.0f - tc[1]; | |||
else | |||
v->data[slot][1] = tc[1]; | |||
v->data[slot][2] = 0.0F; | |||
v->data[slot][3] = 1.0F; | |||
v->data[slot][2] = tc[2]; | |||
v->data[slot][3] = tc[3]; | |||
} | |||
} | |||
@@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage, | |||
} | |||
static int | |||
find_pntc_input_attrib(struct draw_context *draw) | |||
{ | |||
/* Scan the fragment program's input decls to find the pointcoord | |||
* attribute. The xy components will store the point coord. | |||
*/ | |||
return 0; /* XXX fix this */ | |||
} | |||
static void widepoint_first_point( struct draw_stage *stage, | |||
struct prim_header *header ) | |||
static void | |||
widepoint_first_point(struct draw_stage *stage, | |||
struct prim_header *header) | |||
{ | |||
struct widepoint_stage *wide = widepoint_stage(stage); | |||
struct draw_context *draw = stage->draw; | |||
@@ -244,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage, | |||
stage->point = draw_pipe_passthrough_point; | |||
} | |||
draw_remove_extra_vertex_attribs(draw); | |||
if (rast->point_quad_rasterization) { | |||
/* find vertex shader texcoord outputs */ | |||
const struct draw_vertex_shader *vs = draw->vs.vertex_shader; | |||
uint i, j = 0; | |||
wide->texcoord_mode = rast->sprite_coord_mode; | |||
for (i = 0; i < vs->info.num_outputs; i++) { | |||
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { | |||
wide->texcoord_slot[j] = i; | |||
wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1; | |||
j++; | |||
const struct draw_fragment_shader *fs = draw->fs.fragment_shader; | |||
uint i; | |||
wide->num_texcoord_gen = 0; | |||
/* Loop over fragment shader inputs looking for generic inputs | |||
* for which bit 'k' in sprite_coord_enable is set. | |||
*/ | |||
for (i = 0; i < fs->info.num_inputs; i++) { | |||
if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { | |||
const int generic_index = fs->info.input_semantic_index[i]; | |||
/* Note that sprite_coord enable is a bitfield of | |||
* PIPE_MAX_SHADER_OUTPUTS bits. | |||
*/ | |||
if (generic_index < PIPE_MAX_SHADER_OUTPUTS && | |||
(rast->sprite_coord_enable & (1 << generic_index))) { | |||
/* OK, this generic attribute needs to be replaced with a | |||
* texcoord (see above). | |||
*/ | |||
int slot = draw_find_shader_output(draw, | |||
TGSI_SEMANTIC_GENERIC, | |||
generic_index); | |||
if (slot > 0) { | |||
/* there's already a post-vertex shader attribute | |||
* for this fragment shader input attribute. | |||
*/ | |||
} | |||
else { | |||
/* need to allocate a new post-vertex shader attribute */ | |||
slot = draw_alloc_extra_vertex_attrib(draw, | |||
TGSI_SEMANTIC_GENERIC, | |||
generic_index); | |||
} | |||
/* add this slot to the texcoord-gen list */ | |||
wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot; | |||
} | |||
} | |||
} | |||
wide->num_texcoords = j; | |||
/* find fragment shader PointCoord input */ | |||
wide->point_coord_fs_input = find_pntc_input_attrib(draw); | |||
/* setup extra vp output (point coord implemented as a texcoord) */ | |||
draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; | |||
draw->extra_shader_outputs.semantic_index = 0; | |||
draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw); | |||
} | |||
else { | |||
wide->point_coord_fs_input = -1; | |||
draw->extra_shader_outputs.slot = 0; | |||
} | |||
wide->psize_slot = -1; | |||
@@ -295,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags ) | |||
stage->point = widepoint_first_point; | |||
stage->next->flush( stage->next, flags ); | |||
stage->draw->extra_shader_outputs.slot = 0; | |||
draw_remove_extra_vertex_attribs(draw); | |||
/* restore original rasterizer state */ | |||
if (draw->rast_handle) { |
@@ -250,6 +250,11 @@ struct draw_context | |||
struct tgsi_sampler **samplers; | |||
} gs; | |||
/** Fragment shader state */ | |||
struct { | |||
struct draw_fragment_shader *fragment_shader; | |||
} fs; | |||
/** Stream output (vertex feedback) state */ | |||
struct { | |||
struct pipe_stream_output_state state; | |||
@@ -266,9 +271,10 @@ struct draw_context | |||
/* If a prim stage introduces new vertex attributes, they'll be stored here | |||
*/ | |||
struct { | |||
uint semantic_name; | |||
uint semantic_index; | |||
int slot; | |||
uint num; | |||
uint semantic_name[10]; | |||
uint semantic_index[10]; | |||
uint slot[10]; | |||
} extra_shader_outputs; | |||
unsigned reduced_prim; | |||
@@ -362,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw ); | |||
uint draw_current_shader_outputs(const struct draw_context *draw); | |||
uint draw_current_shader_position_output(const struct draw_context *draw); | |||
int draw_alloc_extra_vertex_attrib(struct draw_context *draw, | |||
uint semantic_name, uint semantic_index); | |||
void draw_remove_extra_vertex_attribs(struct draw_context *draw); | |||
/******************************************************************************* | |||
* Vertex processing (was passthrough) code: | |||
*/ |
@@ -614,17 +614,15 @@ lp_build_div(struct lp_build_context *bld, | |||
/** | |||
* Linear interpolation. | |||
* | |||
* This also works for integer values with a few caveats. | |||
* Linear interpolation -- without any checks. | |||
* | |||
* @sa http://www.stereopsis.com/doubleblend.html | |||
*/ | |||
LLVMValueRef | |||
lp_build_lerp(struct lp_build_context *bld, | |||
LLVMValueRef x, | |||
LLVMValueRef v0, | |||
LLVMValueRef v1) | |||
static INLINE LLVMValueRef | |||
lp_build_lerp_simple(struct lp_build_context *bld, | |||
LLVMValueRef x, | |||
LLVMValueRef v0, | |||
LLVMValueRef v1) | |||
{ | |||
LLVMValueRef delta; | |||
LLVMValueRef res; | |||
@@ -639,12 +637,80 @@ lp_build_lerp(struct lp_build_context *bld, | |||
res = lp_build_add(bld, v0, res); | |||
if(bld->type.fixed) | |||
if (bld->type.fixed) { | |||
/* XXX: This step is necessary for lerping 8bit colors stored on 16bits, | |||
* but it will be wrong for other uses. Basically we need a more | |||
* powerful lp_type, capable of further distinguishing the values | |||
* interpretation from the value storage. */ | |||
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), ""); | |||
} | |||
return res; | |||
} | |||
/** | |||
* Linear interpolation. | |||
*/ | |||
LLVMValueRef | |||
lp_build_lerp(struct lp_build_context *bld, | |||
LLVMValueRef x, | |||
LLVMValueRef v0, | |||
LLVMValueRef v1) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMValueRef res; | |||
assert(lp_check_value(type, x)); | |||
assert(lp_check_value(type, v0)); | |||
assert(lp_check_value(type, v1)); | |||
if (type.norm) { | |||
struct lp_type wide_type; | |||
struct lp_build_context wide_bld; | |||
LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh; | |||
LLVMValueRef shift; | |||
assert(type.length >= 2); | |||
assert(!type.sign); | |||
/* | |||
* Create a wider type, enough to hold the intermediate result of the | |||
* multiplication. | |||
*/ | |||
memset(&wide_type, 0, sizeof wide_type); | |||
wide_type.fixed = TRUE; | |||
wide_type.width = type.width*2; | |||
wide_type.length = type.length/2; | |||
lp_build_context_init(&wide_bld, bld->builder, wide_type); | |||
lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh); | |||
lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h); | |||
lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h); | |||
/* | |||
* Scale x from [0, 255] to [0, 256] | |||
*/ | |||
shift = lp_build_const_int_vec(wide_type, type.width - 1); | |||
xl = lp_build_add(&wide_bld, xl, | |||
LLVMBuildAShr(bld->builder, xl, shift, "")); | |||
xh = lp_build_add(&wide_bld, xh, | |||
LLVMBuildAShr(bld->builder, xh, shift, "")); | |||
/* | |||
* Lerp both halves. | |||
*/ | |||
resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l); | |||
resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h); | |||
res = lp_build_pack2(bld->builder, wide_type, type, resl, resh); | |||
} else { | |||
res = lp_build_lerp_simple(bld, x, v0, v1); | |||
} | |||
return res; | |||
} | |||
@@ -923,35 +989,122 @@ lp_build_round_sse41(struct lp_build_context *bld, | |||
enum lp_build_round_sse41_mode mode) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
const char *intrinsic; | |||
LLVMValueRef res; | |||
assert(type.floating); | |||
assert(type.width*type.length == 128); | |||
assert(lp_check_value(type, a)); | |||
assert(util_cpu_caps.has_sse4_1); | |||
switch(type.width) { | |||
case 32: | |||
intrinsic = "llvm.x86.sse41.round.ps"; | |||
break; | |||
case 64: | |||
intrinsic = "llvm.x86.sse41.round.pd"; | |||
break; | |||
default: | |||
assert(0); | |||
return bld->undef; | |||
if (type.length == 1) { | |||
LLVMTypeRef vec_type; | |||
LLVMValueRef undef; | |||
LLVMValueRef args[3]; | |||
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); | |||
switch(type.width) { | |||
case 32: | |||
intrinsic = "llvm.x86.sse41.round.ss"; | |||
break; | |||
case 64: | |||
intrinsic = "llvm.x86.sse41.round.sd"; | |||
break; | |||
default: | |||
assert(0); | |||
return bld->undef; | |||
} | |||
vec_type = LLVMVectorType(bld->elem_type, 4); | |||
undef = LLVMGetUndef(vec_type); | |||
args[0] = undef; | |||
args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); | |||
args[2] = LLVMConstInt(i32t, mode, 0); | |||
res = lp_build_intrinsic(bld->builder, intrinsic, | |||
vec_type, args, Elements(args)); | |||
res = LLVMBuildExtractElement(bld->builder, res, index0, ""); | |||
} | |||
else { | |||
assert(type.width*type.length == 128); | |||
switch(type.width) { | |||
case 32: | |||
intrinsic = "llvm.x86.sse41.round.ps"; | |||
break; | |||
case 64: | |||
intrinsic = "llvm.x86.sse41.round.pd"; | |||
break; | |||
default: | |||
assert(0); | |||
return bld->undef; | |||
} | |||
res = lp_build_intrinsic_binary(bld->builder, intrinsic, | |||
bld->vec_type, a, | |||
LLVMConstInt(i32t, mode, 0)); | |||
} | |||
return res; | |||
} | |||
static INLINE LLVMValueRef | |||
lp_build_iround_nearest_sse2(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMTypeRef ret_type = lp_build_int_vec_type(type); | |||
const char *intrinsic; | |||
LLVMValueRef res; | |||
assert(type.floating); | |||
/* using the double precision conversions is a bit more complicated */ | |||
assert(type.width == 32); | |||
assert(lp_check_value(type, a)); | |||
assert(util_cpu_caps.has_sse2); | |||
/* This is relying on MXCSR rounding mode, which should always be nearest. */ | |||
if (type.length == 1) { | |||
LLVMTypeRef vec_type; | |||
LLVMValueRef undef; | |||
LLVMValueRef arg; | |||
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); | |||
vec_type = LLVMVectorType(bld->elem_type, 4); | |||
intrinsic = "llvm.x86.sse.cvtss2si"; | |||
undef = LLVMGetUndef(vec_type); | |||
arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); | |||
res = lp_build_intrinsic_unary(bld->builder, intrinsic, | |||
ret_type, arg); | |||
} | |||
else { | |||
assert(type.width*type.length == 128); | |||
intrinsic = "llvm.x86.sse2.cvtps2dq"; | |||
res = lp_build_intrinsic_unary(bld->builder, intrinsic, | |||
ret_type, a); | |||
} | |||
return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, | |||
LLVMConstInt(LLVMInt32Type(), mode, 0)); | |||
return res; | |||
} | |||
/** | |||
* Return the integer part of a float (vector) value. The returned value is | |||
* a float (vector). | |||
* Ex: trunc(-1.5) = 1.0 | |||
* Return the integer part of a float (vector) value (== round toward zero). | |||
* The returned value is a float (vector). | |||
* Ex: trunc(-1.5) = -1.0 | |||
*/ | |||
LLVMValueRef | |||
lp_build_trunc(struct lp_build_context *bld, | |||
@@ -962,8 +1115,10 @@ lp_build_trunc(struct lp_build_context *bld, | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); | |||
} | |||
else { | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); | |||
@@ -990,8 +1145,10 @@ lp_build_round(struct lp_build_context *bld, | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); | |||
} | |||
else { | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMValueRef res; | |||
@@ -1016,8 +1173,10 @@ lp_build_floor(struct lp_build_context *bld, | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); | |||
} | |||
else { | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMValueRef res; | |||
@@ -1042,8 +1201,10 @@ lp_build_ceil(struct lp_build_context *bld, | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); | |||
} | |||
else { | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMValueRef res; | |||
@@ -1068,9 +1229,9 @@ lp_build_fract(struct lp_build_context *bld, | |||
/** | |||
* Return the integer part of a float (vector) value. The returned value is | |||
* an integer (vector). | |||
* Ex: itrunc(-1.5) = 1 | |||
* Return the integer part of a float (vector) value (== round toward zero). | |||
* The returned value is an integer (vector). | |||
* Ex: itrunc(-1.5) = -1 | |||
*/ | |||
LLVMValueRef | |||
lp_build_itrunc(struct lp_build_context *bld, | |||
@@ -1097,31 +1258,40 @@ lp_build_iround(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); | |||
LLVMTypeRef int_vec_type = bld->int_vec_type; | |||
LLVMValueRef res; | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { | |||
if (util_cpu_caps.has_sse2 && | |||
((type.width == 32) && (type.length == 1 || type.length == 4))) { | |||
return lp_build_iround_nearest_sse2(bld, a); | |||
} | |||
else if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); | |||
} | |||
else { | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); | |||
LLVMValueRef sign; | |||
LLVMValueRef half; | |||
/* get sign bit */ | |||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); | |||
sign = LLVMBuildAnd(bld->builder, sign, mask, ""); | |||
/* sign * 0.5 */ | |||
half = lp_build_const_vec(type, 0.5); | |||
half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); | |||
half = LLVMBuildOr(bld->builder, sign, half, ""); | |||
half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); | |||
if (type.sign) { | |||
LLVMTypeRef vec_type = bld->vec_type; | |||
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); | |||
LLVMValueRef sign; | |||
/* get sign bit */ | |||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); | |||
sign = LLVMBuildAnd(bld->builder, sign, mask, ""); | |||
/* sign * 0.5 */ | |||
half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); | |||
half = LLVMBuildOr(bld->builder, sign, half, ""); | |||
half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); | |||
} | |||
res = LLVMBuildFAdd(bld->builder, a, half, ""); | |||
} | |||
@@ -1142,37 +1312,42 @@ lp_build_ifloor(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); | |||
LLVMTypeRef int_vec_type = bld->int_vec_type; | |||
LLVMValueRef res; | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); | |||
} | |||
else { | |||
/* Take the sign bit and add it to 1 constant */ | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
unsigned mantissa = lp_mantissa(type); | |||
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); | |||
LLVMValueRef sign; | |||
LLVMValueRef offset; | |||
/* sign = a < 0 ? ~0 : 0 */ | |||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); | |||
sign = LLVMBuildAnd(bld->builder, sign, mask, ""); | |||
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); | |||
/* offset = -0.99999(9)f */ | |||
offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); | |||
offset = LLVMConstBitCast(offset, int_vec_type); | |||
/* offset = a < 0 ? offset : 0.0f */ | |||
offset = LLVMBuildAnd(bld->builder, offset, sign, ""); | |||
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); | |||
res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res"); | |||
res = a; | |||
if (type.sign) { | |||
/* Take the sign bit and add it to 1 constant */ | |||
LLVMTypeRef vec_type = bld->vec_type; | |||
unsigned mantissa = lp_mantissa(type); | |||
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); | |||
LLVMValueRef sign; | |||
LLVMValueRef offset; | |||
/* sign = a < 0 ? ~0 : 0 */ | |||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); | |||
sign = LLVMBuildAnd(bld->builder, sign, mask, ""); | |||
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); | |||
/* offset = -0.99999(9)f */ | |||
offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); | |||
offset = LLVMConstBitCast(offset, int_vec_type); | |||
/* offset = a < 0 ? offset : 0.0f */ | |||
offset = LLVMBuildAnd(bld->builder, offset, sign, ""); | |||
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); | |||
res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res"); | |||
} | |||
} | |||
/* round to nearest (toward zero) */ | |||
@@ -1192,35 +1367,39 @@ lp_build_iceil(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); | |||
LLVMTypeRef int_vec_type = bld->int_vec_type; | |||
LLVMValueRef res; | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); | |||
} | |||
else { | |||
LLVMTypeRef vec_type = lp_build_vec_type(type); | |||
LLVMTypeRef vec_type = bld->vec_type; | |||
unsigned mantissa = lp_mantissa(type); | |||
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); | |||
LLVMValueRef sign; | |||
LLVMValueRef offset; | |||
/* sign = a < 0 ? 0 : ~0 */ | |||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); | |||
sign = LLVMBuildAnd(bld->builder, sign, mask, ""); | |||
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); | |||
sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); | |||
/* offset = 0.99999(9)f */ | |||
offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); | |||
offset = LLVMConstBitCast(offset, int_vec_type); | |||
/* offset = a < 0 ? 0.0 : offset */ | |||
offset = LLVMBuildAnd(bld->builder, offset, sign, ""); | |||
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); | |||
if (type.sign) { | |||
LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); | |||
LLVMValueRef sign; | |||
/* sign = a < 0 ? 0 : ~0 */ | |||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); | |||
sign = LLVMBuildAnd(bld->builder, sign, mask, ""); | |||
sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); | |||
sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); | |||
/* offset = a < 0 ? 0.0 : offset */ | |||
offset = LLVMConstBitCast(offset, int_vec_type); | |||
offset = LLVMBuildAnd(bld->builder, offset, sign, ""); | |||
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); | |||
} | |||
res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); | |||
} | |||
@@ -1232,6 +1411,46 @@ lp_build_iceil(struct lp_build_context *bld, | |||
} | |||
/** | |||
* Combined ifloor() & fract(). | |||
* | |||
* Preferred to calling the functions separately, as it will ensure that the | |||
* stratergy (floor() vs ifloor()) that results in less redundant work is used. | |||
*/ | |||
void | |||
lp_build_ifloor_fract(struct lp_build_context *bld, | |||
LLVMValueRef a, | |||
LLVMValueRef *out_ipart, | |||
LLVMValueRef *out_fpart) | |||
{ | |||
const struct lp_type type = bld->type; | |||
LLVMValueRef ipart; | |||
assert(type.floating); | |||
assert(lp_check_value(type, a)); | |||
if (util_cpu_caps.has_sse4_1 && | |||
(type.length == 1 || type.width*type.length == 128)) { | |||
/* | |||
* floor() is easier. | |||
*/ | |||
ipart = lp_build_floor(bld, a); | |||
*out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); | |||
*out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart"); | |||
} | |||
else { | |||
/* | |||
* ifloor() is easier. | |||
*/ | |||
*out_ipart = lp_build_ifloor(bld, a); | |||
ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart"); | |||
*out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); | |||
} | |||
} | |||
LLVMValueRef | |||
lp_build_sqrt(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
@@ -2040,6 +2259,71 @@ lp_build_exp2(struct lp_build_context *bld, | |||
} | |||
/** | |||
* Extract the exponent of a IEEE-754 floating point value. | |||
* | |||
* Optionally apply an integer bias. | |||
* | |||
* Result is an integer value with | |||
* | |||
* ifloor(log2(x)) + bias | |||
*/ | |||
LLVMValueRef | |||
lp_build_extract_exponent(struct lp_build_context *bld, | |||
LLVMValueRef x, | |||
int bias) | |||
{ | |||
const struct lp_type type = bld->type; | |||
unsigned mantissa = lp_mantissa(type); | |||
LLVMValueRef res; | |||
assert(type.floating); | |||
assert(lp_check_value(bld->type, x)); | |||
x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); | |||
res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); | |||
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), ""); | |||
res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), ""); | |||
return res; | |||
} | |||
/** | |||
* Extract the mantissa of the a floating. | |||
* | |||
* Result is a floating point value with | |||
* | |||
* x / floor(log2(x)) | |||
*/ | |||
LLVMValueRef | |||
lp_build_extract_mantissa(struct lp_build_context *bld, | |||
LLVMValueRef x) | |||
{ | |||
const struct lp_type type = bld->type; | |||
unsigned mantissa = lp_mantissa(type); | |||
LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1); | |||
LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type); | |||
LLVMValueRef res; | |||
assert(lp_check_value(bld->type, x)); | |||
assert(type.floating); | |||
x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); | |||
/* res = x / 2**ipart */ | |||
res = LLVMBuildAnd(bld->builder, x, mantmask, ""); | |||
res = LLVMBuildOr(bld->builder, res, one, ""); | |||
res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); | |||
return res; | |||
} | |||
/** | |||
* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ | |||
* These coefficients can be generate with | |||
@@ -2159,3 +2443,62 @@ lp_build_log2(struct lp_build_context *bld, | |||
lp_build_log2_approx(bld, x, NULL, NULL, &res); | |||
return res; | |||
} | |||
/** | |||
* Faster (and less accurate) log2. | |||
* | |||
* log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x)) | |||
* | |||
* Piece-wise linear approximation, with exact results when x is a | |||
* power of two. | |||
* | |||
* See http://www.flipcode.com/archives/Fast_log_Function.shtml | |||
*/ | |||
LLVMValueRef | |||
lp_build_fast_log2(struct lp_build_context *bld, | |||
LLVMValueRef x) | |||
{ | |||
LLVMValueRef ipart; | |||
LLVMValueRef fpart; | |||
assert(lp_check_value(bld->type, x)); | |||
assert(bld->type.floating); | |||
/* ipart = floor(log2(x)) - 1 */ | |||
ipart = lp_build_extract_exponent(bld, x, -1); | |||
ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, ""); | |||
/* fpart = x / 2**ipart */ | |||
fpart = lp_build_extract_mantissa(bld, x); | |||
/* ipart + fpart */ | |||
return LLVMBuildFAdd(bld->builder, ipart, fpart, ""); | |||
} | |||
/** | |||
* Fast implementation of iround(log2(x)). | |||
* | |||
* Not an approximation -- it should give accurate results all the time. | |||
*/ | |||
LLVMValueRef | |||
lp_build_ilog2(struct lp_build_context *bld, | |||
LLVMValueRef x) | |||
{ | |||
LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2); | |||
LLVMValueRef ipart; | |||
assert(bld->type.floating); | |||
assert(lp_check_value(bld->type, x)); | |||
/* x * 2^(0.5) i.e., add 0.5 to the log2(x) */ | |||
x = LLVMBuildFMul(bld->builder, x, sqrt2, ""); | |||
/* ipart = floor(log2(x) + 0.5) */ | |||
ipart = lp_build_extract_exponent(bld, x, 0); | |||
return ipart; | |||
} |
@@ -171,6 +171,12 @@ LLVMValueRef | |||
lp_build_itrunc(struct lp_build_context *bld, | |||
LLVMValueRef a); | |||
void | |||
lp_build_ifloor_fract(struct lp_build_context *bld, | |||
LLVMValueRef a, | |||
LLVMValueRef *out_ipart, | |||
LLVMValueRef *out_fpart); | |||
LLVMValueRef | |||
lp_build_sqrt(struct lp_build_context *bld, | |||
LLVMValueRef a); | |||
@@ -208,10 +214,27 @@ LLVMValueRef | |||
lp_build_exp2(struct lp_build_context *bld, | |||
LLVMValueRef a); | |||
LLVMValueRef | |||
lp_build_extract_exponent(struct lp_build_context *bld, | |||
LLVMValueRef x, | |||
int bias); | |||
LLVMValueRef | |||
lp_build_extract_mantissa(struct lp_build_context *bld, | |||
LLVMValueRef x); | |||
LLVMValueRef | |||
lp_build_log2(struct lp_build_context *bld, | |||
LLVMValueRef a); | |||
LLVMValueRef | |||
lp_build_fast_log2(struct lp_build_context *bld, | |||
LLVMValueRef a); | |||
LLVMValueRef | |||
lp_build_ilog2(struct lp_build_context *bld, | |||
LLVMValueRef x); | |||
void | |||
lp_build_exp2_approx(struct lp_build_context *bld, | |||
LLVMValueRef x, |
@@ -63,6 +63,7 @@ | |||
#include "util/u_debug.h" | |||
#include "util/u_math.h" | |||
#include "util/u_cpu_detect.h" | |||
#include "lp_bld_type.h" | |||
#include "lp_bld_const.h" | |||
@@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, | |||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); | |||
LLVMValueRef res; | |||
unsigned mantissa; | |||
unsigned n; | |||
unsigned long long ubound; | |||
unsigned long long mask; | |||
double scale; | |||
double bias; | |||
assert(src_type.floating); | |||
assert(dst_width <= src_type.width); | |||
src_type.sign = FALSE; | |||
mantissa = lp_mantissa(src_type); | |||
/* We cannot carry more bits than the mantissa */ | |||
n = MIN2(mantissa, dst_width); | |||
if (dst_width <= mantissa) { | |||
/* | |||
* Apply magic coefficients that will make the desired result to appear | |||
* in the lowest significant bits of the mantissa, with correct rounding. | |||
* | |||
* This only works if the destination width fits in the mantissa. | |||
*/ | |||
/* This magic coefficients will make the desired result to appear in the | |||
* lowest significant bits of the mantissa. | |||
*/ | |||
ubound = ((unsigned long long)1 << n); | |||
mask = ubound - 1; | |||
scale = (double)mask/ubound; | |||
bias = (double)((unsigned long long)1 << (mantissa - n)); | |||
unsigned long long ubound; | |||
unsigned long long mask; | |||
double scale; | |||
double bias; | |||
res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); | |||
res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); | |||
res = LLVMBuildBitCast(builder, res, int_vec_type, ""); | |||
ubound = (1ULL << dst_width); | |||
mask = ubound - 1; | |||
scale = (double)mask/ubound; | |||
bias = (double)(1ULL << (mantissa - dst_width)); | |||
if(dst_width > n) { | |||
int shift = dst_width - n; | |||
res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); | |||
res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); | |||
res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); | |||
res = LLVMBuildBitCast(builder, res, int_vec_type, ""); | |||
res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); | |||
} | |||
else if (dst_width == (mantissa + 1)) { | |||
/* | |||
* The destination width matches exactly what can be represented in | |||
* floating point (i.e., mantissa + 1 bits). So do a straight | |||
* multiplication followed by casting. No further rounding is necessary. | |||
*/ | |||
double scale; | |||
/* TODO: Fill in the empty lower bits for additional precision? */ | |||
/* YES: this fixes progs/trivial/tri-z-eq.c. | |||
* Otherwise vertex Z=1.0 values get converted to something like | |||
* 0xfffffb00 and the test for equality with 0xffffffff fails. | |||
scale = (double)((1ULL << dst_width) - 1); | |||
res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); | |||
res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); | |||
} | |||
else { | |||
/* | |||
* The destination exceeds what can be represented in the floating point. | |||
* So multiply by the largest power two we get away with, and when | |||
* subtract the most significant bit to rescale to normalized values. | |||
* | |||
* The largest power of two factor we can get away is | |||
* (1 << (src_type.width - 1)), because we need to use signed . In theory it | |||
* should be (1 << (src_type.width - 2)), but IEEE 754 rules states | |||
* INT_MIN should be returned in FPToSI, which is the correct result for | |||
* values near 1.0! | |||
* | |||
* This means we get (src_type.width - 1) correct bits for values near 0.0, | |||
* and (mantissa + 1) correct bits for values near 1.0. Equally or more | |||
* important, we also get exact results for 0.0 and 1.0. | |||
*/ | |||
#if 0 | |||
{ | |||
LLVMValueRef msb; | |||
msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); | |||
msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); | |||
msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); | |||
res = LLVMBuildOr(builder, res, msb, ""); | |||
} | |||
#elif 0 | |||
while(shift > 0) { | |||
res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); | |||
shift -= n; | |||
n *= 2; | |||
unsigned n = MIN2(src_type.width - 1, dst_width); | |||
double scale = (double)(1ULL << n); | |||
unsigned lshift = dst_width - n; | |||
unsigned rshift = n; | |||
LLVMValueRef lshifted; | |||
LLVMValueRef rshifted; | |||
res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); | |||
res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); | |||
/* | |||
* Align the most significant bit to its final place. | |||
* | |||
* This will cause 1.0 to overflow to 0, but the later adjustment will | |||
* get it right. | |||
*/ | |||
if (lshift) { | |||
lshifted = LLVMBuildShl(builder, res, | |||
lp_build_const_int_vec(src_type, lshift), ""); | |||
} else { | |||
lshifted = res; | |||
} | |||
#endif | |||
/* | |||
* Align the most significant bit to the right. | |||
*/ | |||
rshifted = LLVMBuildAShr(builder, res, | |||
lp_build_const_int_vec(src_type, rshift), ""); | |||
/* | |||
* Subtract the MSB to the LSB, therefore re-scaling from | |||
* (1 << dst_width) to ((1 << dst_width) - 1). | |||
*/ | |||
res = LLVMBuildSub(builder, lshifted, rshifted, ""); | |||
} | |||
else | |||
res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); | |||
return res; | |||
} | |||
@@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, | |||
assert(dst_type.floating); | |||
/* Special-case int8->float, though most cases could be handled | |||
* this way: | |||
*/ | |||
if (src_width == 8) { | |||
scale = 1.0/255.0; | |||
res = LLVMBuildSIToFP(builder, src, vec_type, ""); | |||
res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); | |||
return res; | |||
} | |||
mantissa = lp_mantissa(dst_type); | |||
n = MIN2(mantissa, src_width); | |||
@@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder, | |||
} | |||
num_tmps = num_srcs; | |||
/* Special case 4x4f --> 1x16ub | |||
*/ | |||
if (src_type.floating == 1 && | |||
src_type.fixed == 0 && | |||
src_type.sign == 1 && | |||
src_type.norm == 0 && | |||
src_type.width == 32 && | |||
src_type.length == 4 && | |||
dst_type.floating == 0 && | |||
dst_type.fixed == 0 && | |||
dst_type.sign == 0 && | |||
dst_type.norm == 1 && | |||
dst_type.width == 8 && | |||
dst_type.length == 16 && | |||
util_cpu_caps.has_sse2) | |||
{ | |||
int i; | |||
for (i = 0; i < num_dsts; i++, src += 4) { | |||
struct lp_type int16_type = dst_type; | |||
struct lp_type int32_type = dst_type; | |||
LLVMValueRef lo, hi; | |||
LLVMValueRef src_int0; | |||
LLVMValueRef src_int1; | |||
LLVMValueRef src_int2; | |||
LLVMValueRef src_int3; | |||
LLVMTypeRef int16_vec_type; | |||
LLVMTypeRef int32_vec_type; | |||
LLVMTypeRef src_vec_type; | |||
LLVMTypeRef dst_vec_type; | |||
LLVMValueRef const_255f; | |||
LLVMValueRef a, b, c, d; | |||
int16_type.width *= 2; | |||
int16_type.length /= 2; | |||
int16_type.sign = 1; | |||
int32_type.width *= 4; | |||
int32_type.length /= 4; | |||
int32_type.sign = 1; | |||
src_vec_type = lp_build_vec_type(src_type); | |||
dst_vec_type = lp_build_vec_type(dst_type); | |||
int16_vec_type = lp_build_vec_type(int16_type); | |||
int32_vec_type = lp_build_vec_type(int32_type); | |||
const_255f = lp_build_const_vec(src_type, 255.0f); | |||
a = LLVMBuildFMul(builder, src[0], const_255f, ""); | |||
b = LLVMBuildFMul(builder, src[1], const_255f, ""); | |||
c = LLVMBuildFMul(builder, src[2], const_255f, ""); | |||
d = LLVMBuildFMul(builder, src[3], const_255f, ""); | |||
{ | |||
struct lp_build_context bld; | |||
bld.builder = builder; | |||
bld.type = src_type; | |||
bld.vec_type = src_vec_type; | |||
bld.int_elem_type = lp_build_elem_type(int32_type); | |||
bld.int_vec_type = int32_vec_type; | |||
bld.undef = lp_build_undef(src_type); | |||
bld.zero = lp_build_zero(src_type); | |||
bld.one = lp_build_one(src_type); | |||
src_int0 = lp_build_iround(&bld, a); | |||
src_int1 = lp_build_iround(&bld, b); | |||
src_int2 = lp_build_iround(&bld, c); | |||
src_int3 = lp_build_iround(&bld, d); | |||
} | |||
/* relying on clamping behavior of sse2 intrinsics here */ | |||
lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1); | |||
hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3); | |||
dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi); | |||
} | |||
return; | |||
} | |||
/* | |||
* Clamp if necessary | |||
*/ |
@@ -57,6 +57,8 @@ lp_disassemble(const void* func) | |||
#ifdef HAVE_UDIS86 | |||
ud_t ud_obj; | |||
uint64_t max_jmp_pc; | |||
uint inst_no; | |||
boolean emit_addrs = TRUE, emit_line_nos = FALSE; | |||
ud_init(&ud_obj); | |||
@@ -76,13 +78,18 @@ lp_disassemble(const void* func) | |||
while (ud_disassemble(&ud_obj)) { | |||
if (emit_addrs) { | |||
#ifdef PIPE_ARCH_X86 | |||
debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); | |||
debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); | |||
#endif | |||
#ifdef PIPE_ARCH_X86_64 | |||
debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); | |||
debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); | |||
#endif | |||
} | |||
else if (emit_line_nos) { | |||
debug_printf("%6d:\t", inst_no); | |||
inst_no++; | |||
} | |||
#if 0 | |||
debug_printf("%-16s ", ud_insn_hex(&ud_obj)); | |||
#endif | |||
@@ -115,8 +122,10 @@ lp_disassemble(const void* func) | |||
} | |||
} | |||
if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || | |||
ud_obj.mnemonic == UD_Iinvalid) | |||
if (ud_obj.mnemonic == UD_Iinvalid || | |||
(ud_insn_off(&ud_obj) >= max_jmp_pc && | |||
(ud_obj.mnemonic == UD_Iret || | |||
ud_obj.mnemonic == UD_Ijmp))) | |||
break; | |||
} | |||
@@ -36,11 +36,12 @@ | |||
#include "util/u_string.h" | |||
#define GALLIVM_DEBUG_TGSI 0x1 | |||
#define GALLIVM_DEBUG_IR 0x2 | |||
#define GALLIVM_DEBUG_ASM 0x4 | |||
#define GALLIVM_DEBUG_NO_OPT 0x8 | |||
#define GALLIVM_DEBUG_PERF 0x10 | |||
#define GALLIVM_DEBUG_TGSI (1 << 0) | |||
#define GALLIVM_DEBUG_IR (1 << 1) | |||
#define GALLIVM_DEBUG_ASM (1 << 2) | |||
#define GALLIVM_DEBUG_NO_OPT (1 << 3) | |||
#define GALLIVM_DEBUG_PERF (1 << 4) | |||
#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5) | |||
#ifdef DEBUG |
@@ -38,273 +38,15 @@ | |||
#include "lp_bld_flow.h" | |||
#define LP_BUILD_FLOW_MAX_VARIABLES 64 | |||
#define LP_BUILD_FLOW_MAX_DEPTH 32 | |||
/** | |||
* Enumeration of all possible flow constructs. | |||
*/ | |||
enum lp_build_flow_construct_kind { | |||
LP_BUILD_FLOW_SCOPE, | |||
LP_BUILD_FLOW_SKIP, | |||
LP_BUILD_FLOW_IF | |||
}; | |||
/** | |||
* Variable declaration scope. | |||
*/ | |||
struct lp_build_flow_scope | |||
{ | |||
/** Number of variables declared in this scope */ | |||
unsigned num_variables; | |||
}; | |||
/** | |||
* Early exit. Useful to skip to the end of a function or block when | |||
* the execution mask becomes zero or when there is an error condition. | |||
*/ | |||
struct lp_build_flow_skip | |||
{ | |||
/** Block to skip to */ | |||
LLVMBasicBlockRef block; | |||
/** Number of variables declared at the beginning */ | |||
unsigned num_variables; | |||
LLVMValueRef *phi; /**< array [num_variables] */ | |||
}; | |||
/** | |||
* if/else/endif. | |||
*/ | |||
struct lp_build_flow_if | |||
{ | |||
unsigned num_variables; | |||
LLVMValueRef *phi; /**< array [num_variables] */ | |||
LLVMValueRef condition; | |||
LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; | |||
}; | |||
/** | |||
* Union of all possible flow constructs' data | |||
*/ | |||
union lp_build_flow_construct_data | |||
{ | |||
struct lp_build_flow_scope scope; | |||
struct lp_build_flow_skip skip; | |||
struct lp_build_flow_if ifthen; | |||
}; | |||
/** | |||
* Element of the flow construct stack. | |||
*/ | |||
struct lp_build_flow_construct | |||
{ | |||
enum lp_build_flow_construct_kind kind; | |||
union lp_build_flow_construct_data data; | |||
}; | |||
/** | |||
* All necessary data to generate LLVM control flow constructs. | |||
* Insert a new block, right where builder is pointing to. | |||
* | |||
* Besides keeping track of the control flow construct themselves we also | |||
* need to keep track of variables in order to generate SSA Phi values. | |||
*/ | |||
struct lp_build_flow_context | |||
{ | |||
LLVMBuilderRef builder; | |||
/** | |||
* Control flow stack. | |||
*/ | |||
struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; | |||
unsigned num_constructs; | |||
/** | |||
* Variable stack | |||
*/ | |||
LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; | |||
unsigned num_variables; | |||
}; | |||
struct lp_build_flow_context * | |||
lp_build_flow_create(LLVMBuilderRef builder) | |||
{ | |||
struct lp_build_flow_context *flow; | |||
flow = CALLOC_STRUCT(lp_build_flow_context); | |||
if(!flow) | |||
return NULL; | |||
flow->builder = builder; | |||
return flow; | |||
} | |||
void | |||
lp_build_flow_destroy(struct lp_build_flow_context *flow) | |||
{ | |||
assert(flow->num_constructs == 0); | |||
assert(flow->num_variables == 0); | |||
FREE(flow); | |||
} | |||
/** | |||
* Begin/push a new flow control construct, such as a loop, skip block | |||
* or variable scope. | |||
*/ | |||
static union lp_build_flow_construct_data * | |||
lp_build_flow_push(struct lp_build_flow_context *flow, | |||
enum lp_build_flow_construct_kind kind) | |||
{ | |||
assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); | |||
if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) | |||
return NULL; | |||
flow->constructs[flow->num_constructs].kind = kind; | |||
return &flow->constructs[flow->num_constructs++].data; | |||
} | |||
/** | |||
* Return the current/top flow control construct on the stack. | |||
* \param kind the expected type of the top-most construct | |||
*/ | |||
static union lp_build_flow_construct_data * | |||
lp_build_flow_peek(struct lp_build_flow_context *flow, | |||
enum lp_build_flow_construct_kind kind) | |||
{ | |||
assert(flow->num_constructs); | |||
if(!flow->num_constructs) | |||
return NULL; | |||
assert(flow->constructs[flow->num_constructs - 1].kind == kind); | |||
if(flow->constructs[flow->num_constructs - 1].kind != kind) | |||
return NULL; | |||
return &flow->constructs[flow->num_constructs - 1].data; | |||
} | |||
/** | |||
* End/pop the current/top flow control construct on the stack. | |||
* \param kind the expected type of the top-most construct | |||
*/ | |||
static union lp_build_flow_construct_data * | |||
lp_build_flow_pop(struct lp_build_flow_context *flow, | |||
enum lp_build_flow_construct_kind kind) | |||
{ | |||
assert(flow->num_constructs); | |||
if(!flow->num_constructs) | |||
return NULL; | |||
assert(flow->constructs[flow->num_constructs - 1].kind == kind); | |||
if(flow->constructs[flow->num_constructs - 1].kind != kind) | |||
return NULL; | |||
return &flow->constructs[--flow->num_constructs].data; | |||
} | |||
/** | |||
* Begin a variable scope. | |||
* This is useful important not only for aesthetic reasons, but also for | |||
* performance reasons, as frequently run blocks should be laid out next to | |||
* each other and fall-throughs maximized. | |||
* | |||
* See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp. | |||
* | |||
*/ | |||
void | |||
lp_build_flow_scope_begin(struct lp_build_flow_context *flow) | |||
{ | |||
struct lp_build_flow_scope *scope; | |||
scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; | |||
if(!scope) | |||
return; | |||
scope->num_variables = 0; | |||
} | |||
/** | |||
* Declare a variable. | |||
* | |||
* A variable is a named entity which can have different LLVMValueRef's at | |||
* different points of the program. This is relevant for control flow because | |||
* when there are multiple branches to a same location we need to replace | |||
* the variable's value with a Phi function as explained in | |||
* http://en.wikipedia.org/wiki/Static_single_assignment_form . | |||
* | |||
* We keep track of variables by keeping around a pointer to where they're | |||
* current. | |||
* | |||
* There are a few cautions to observe: | |||
* | |||
* - Variable's value must not be NULL. If there is no initial value then | |||
* LLVMGetUndef() should be used. | |||
* | |||
* - Variable's value must be kept up-to-date. If the variable is going to be | |||
* modified by a function then a pointer should be passed so that its value | |||
* is accurate. Failure to do this will cause some of the variables' | |||
* transient values to be lost, leading to wrong results. | |||
* | |||
* - A program should be written from top to bottom, by always appending | |||
* instructions to the bottom with a single LLVMBuilderRef. Inserting and/or | |||
* modifying existing statements will most likely lead to wrong results. | |||
* | |||
*/ | |||
void | |||
lp_build_flow_scope_declare(struct lp_build_flow_context *flow, | |||
LLVMValueRef *variable) | |||
{ | |||
struct lp_build_flow_scope *scope; | |||
scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; | |||
if(!scope) | |||
return; | |||
assert(*variable); | |||
if(!*variable) | |||
return; | |||
assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); | |||
if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) | |||
return; | |||
flow->variables[flow->num_variables++] = variable; | |||
++scope->num_variables; | |||
} | |||
void | |||
lp_build_flow_scope_end(struct lp_build_flow_context *flow) | |||
{ | |||
struct lp_build_flow_scope *scope; | |||
scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; | |||
if(!scope) | |||
return; | |||
assert(flow->num_variables >= scope->num_variables); | |||
if(flow->num_variables < scope->num_variables) { | |||
flow->num_variables = 0; | |||
return; | |||
} | |||
flow->num_variables -= scope->num_variables; | |||
} | |||
/** | |||
* Note: this function has no dependencies on the flow code and could | |||
* be used elsewhere. | |||
*/ | |||
@@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) | |||
} | |||
static LLVMBasicBlockRef | |||
lp_build_flow_insert_block(struct lp_build_flow_context *flow) | |||
{ | |||
return lp_build_insert_new_block(flow->builder, ""); | |||
} | |||
/** | |||
* Begin a "skip" block. Inside this block we can test a condition and | |||
* skip to the end of the block if the condition is false. | |||
*/ | |||
void | |||
lp_build_flow_skip_begin(struct lp_build_flow_context *flow) | |||
lp_build_flow_skip_begin(struct lp_build_skip_context *skip, | |||
LLVMBuilderRef builder) | |||
{ | |||
struct lp_build_flow_skip *skip; | |||
LLVMBuilderRef builder; | |||
unsigned i; | |||
skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; | |||
if(!skip) | |||
return; | |||
skip->builder = builder; | |||
/* create new basic block */ | |||
skip->block = lp_build_flow_insert_block(flow); | |||
skip->num_variables = flow->num_variables; | |||
if(!skip->num_variables) { | |||
skip->phi = NULL; | |||
return; | |||
} | |||
/* Allocate a Phi node for each variable in this skip scope */ | |||
skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); | |||
if(!skip->phi) { | |||
skip->num_variables = 0; | |||
return; | |||
} | |||
builder = LLVMCreateBuilder(); | |||
LLVMPositionBuilderAtEnd(builder, skip->block); | |||
/* create a Phi node for each variable */ | |||
for(i = 0; i < skip->num_variables; ++i) | |||
skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); | |||
LLVMDisposeBuilder(builder); | |||
skip->block = lp_build_insert_new_block(skip->builder, "skip"); | |||
} | |||
@@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) | |||
* skip block if the condition is true. | |||
*/ | |||
void | |||
lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, | |||
lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip, | |||
LLVMValueRef cond) | |||
{ | |||
struct lp_build_flow_skip *skip; | |||
LLVMBasicBlockRef current_block; | |||
LLVMBasicBlockRef new_block; | |||
unsigned i; | |||
skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; | |||
if(!skip) | |||
return; | |||
current_block = LLVMGetInsertBlock(flow->builder); | |||
new_block = lp_build_flow_insert_block(flow); | |||
/* for each variable, update the Phi node with a (variable, block) pair */ | |||
for(i = 0; i < skip->num_variables; ++i) { | |||
assert(*flow->variables[i]); | |||
assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); | |||
LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); | |||
} | |||
new_block = lp_build_insert_new_block(skip->builder, ""); | |||
/* if cond is true, goto skip->block, else goto new_block */ | |||
LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); | |||
LLVMBuildCondBr(skip->builder, cond, skip->block, new_block); | |||
LLVMPositionBuilderAtEnd(flow->builder, new_block); | |||
LLVMPositionBuilderAtEnd(skip->builder, new_block); | |||
} | |||
void | |||
lp_build_flow_skip_end(struct lp_build_flow_context *flow) | |||
lp_build_flow_skip_end(struct lp_build_skip_context *skip) | |||
{ | |||
struct lp_build_flow_skip *skip; | |||
LLVMBasicBlockRef current_block; | |||
unsigned i; | |||
skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; | |||
if(!skip) | |||
return; | |||
current_block = LLVMGetInsertBlock(flow->builder); | |||
/* add (variable, block) tuples to the phi nodes */ | |||
for(i = 0; i < skip->num_variables; ++i) { | |||
assert(*flow->variables[i]); | |||
assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); | |||
LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); | |||
*flow->variables[i] = skip->phi[i]; | |||
} | |||
/* goto block */ | |||
LLVMBuildBr(flow->builder, skip->block); | |||
LLVMPositionBuilderAtEnd(flow->builder, skip->block); | |||
FREE(skip->phi); | |||
LLVMBuildBr(skip->builder, skip->block); | |||
LLVMPositionBuilderAtEnd(skip->builder, skip->block); | |||
} | |||
/** | |||
* Check if the mask predicate is zero. If so, jump to the end of the block. | |||
*/ | |||
static void | |||
void | |||
lp_build_mask_check(struct lp_build_mask_context *mask) | |||
{ | |||
LLVMBuilderRef builder = mask->flow->builder; | |||
LLVMBuilderRef builder = mask->skip.builder; | |||
LLVMValueRef value; | |||
LLVMValueRef cond; | |||
value = lp_build_mask_value(mask); | |||
/* cond = (mask == 0) */ | |||
cond = LLVMBuildICmp(builder, | |||
LLVMIntEQ, | |||
LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), | |||
LLVMBuildBitCast(builder, value, mask->reg_type, ""), | |||
LLVMConstNull(mask->reg_type), | |||
""); | |||
/* if cond, goto end of block */ | |||
lp_build_flow_skip_cond_break(mask->flow, cond); | |||
lp_build_flow_skip_cond_break(&mask->skip, cond); | |||
} | |||
@@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask) | |||
*/ | |||
void | |||
lp_build_mask_begin(struct lp_build_mask_context *mask, | |||
struct lp_build_flow_context *flow, | |||
LLVMBuilderRef builder, | |||
struct lp_type type, | |||
LLVMValueRef value) | |||
{ | |||
memset(mask, 0, sizeof *mask); | |||
mask->flow = flow; | |||
mask->reg_type = LLVMIntType(type.width * type.length); | |||
mask->value = value; | |||
mask->var = lp_build_alloca(builder, | |||
lp_build_int_vec_type(type), | |||
"execution_mask"); | |||
lp_build_flow_scope_begin(flow); | |||
lp_build_flow_scope_declare(flow, &mask->value); | |||
lp_build_flow_skip_begin(flow); | |||
LLVMBuildStore(builder, value, mask->var); | |||
lp_build_mask_check(mask); | |||
lp_build_flow_skip_begin(&mask->skip, builder); | |||
} | |||
LLVMValueRef | |||
lp_build_mask_value(struct lp_build_mask_context *mask) | |||
{ | |||
return LLVMBuildLoad(mask->skip.builder, mask->var, ""); | |||
} | |||
@@ -504,9 +185,10 @@ void | |||
lp_build_mask_update(struct lp_build_mask_context *mask, | |||
LLVMValueRef value) | |||
{ | |||
mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); | |||
lp_build_mask_check(mask); | |||
value = LLVMBuildAnd(mask->skip.builder, | |||
lp_build_mask_value(mask), | |||
value, ""); | |||
LLVMBuildStore(mask->skip.builder, value, mask->var); | |||
} | |||
@@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask, | |||
LLVMValueRef | |||
lp_build_mask_end(struct lp_build_mask_context *mask) | |||
{ | |||
lp_build_flow_skip_end(mask->flow); | |||
lp_build_flow_scope_end(mask->flow); | |||
return mask->value; | |||
lp_build_flow_skip_end(&mask->skip); | |||
return lp_build_mask_value(mask); | |||
} | |||
@@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder, | |||
LLVMValueRef start, | |||
struct lp_build_loop_state *state) | |||
{ | |||
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); | |||
LLVMValueRef function = LLVMGetBasicBlockParent(block); | |||
state->block = lp_build_insert_new_block(builder, "loop_begin"); | |||
state->block = LLVMAppendBasicBlock(function, "loop"); | |||
state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter"); | |||
LLVMBuildStore(builder, start, state->counter_var); | |||
LLVMBuildBr(builder, state->block); | |||
LLVMPositionBuilderAtEnd(builder, state->block); | |||
state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), ""); | |||
LLVMAddIncoming(state->counter, &start, &block, 1); | |||
state->counter = LLVMBuildLoad(builder, state->counter_var, ""); | |||
} | |||
void | |||
lp_build_loop_end(LLVMBuilderRef builder, | |||
LLVMValueRef end, | |||
LLVMValueRef step, | |||
struct lp_build_loop_state *state) | |||
{ | |||
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); | |||
LLVMValueRef function = LLVMGetBasicBlockParent(block); | |||
LLVMValueRef next; | |||
LLVMValueRef cond; | |||
LLVMBasicBlockRef after_block; | |||
if (!step) | |||
step = LLVMConstInt(LLVMTypeOf(end), 1, 0); | |||
next = LLVMBuildAdd(builder, state->counter, step, ""); | |||
cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, ""); | |||
after_block = LLVMAppendBasicBlock(function, ""); | |||
LLVMBuildCondBr(builder, cond, after_block, state->block); | |||
LLVMAddIncoming(state->counter, &next, &block, 1); | |||
LLVMPositionBuilderAtEnd(builder, after_block); | |||
} | |||
void | |||
lp_build_loop_end_cond(LLVMBuilderRef builder, | |||
LLVMValueRef end, | |||
LLVMValueRef step, | |||
int llvm_cond, | |||
LLVMIntPredicate llvm_cond, | |||
struct lp_build_loop_state *state) | |||
{ | |||
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); | |||
LLVMValueRef function = LLVMGetBasicBlockParent(block); | |||
LLVMValueRef next; | |||
LLVMValueRef cond; | |||
LLVMBasicBlockRef after_block; | |||
@@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, | |||
next = LLVMBuildAdd(builder, state->counter, step, ""); | |||
LLVMBuildStore(builder, next, state->counter_var); | |||
cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); | |||
after_block = LLVMAppendBasicBlock(function, ""); | |||
after_block = lp_build_insert_new_block(builder, "loop_end"); | |||
LLVMBuildCondBr(builder, cond, after_block, state->block); | |||
LLVMAddIncoming(state->counter, &next, &block, 1); | |||
LLVMPositionBuilderAtEnd(builder, after_block); | |||
state->counter = LLVMBuildLoad(builder, state->counter_var, ""); | |||
} | |||
void | |||
lp_build_loop_end(LLVMBuilderRef builder, | |||
LLVMValueRef end, | |||
LLVMValueRef step, | |||
struct lp_build_loop_state *state) | |||
{ | |||
lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state); | |||
} | |||
@@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, | |||
Is built with: | |||
LLVMValueRef x = LLVMGetUndef(); // or something else | |||
// x needs an alloca variable | |||
x = lp_build_alloca(builder, type, "x"); | |||
flow = lp_build_flow_create(builder); | |||
lp_build_flow_scope_begin(flow); | |||
lp_build_if(ctx, builder, cond); | |||
LLVMBuildStore(LLVMBuildAdd(1, 2), x); | |||
lp_build_else(ctx); | |||
LLVMBuildStore(LLVMBuildAdd(2, 3). x); | |||
lp_build_endif(ctx); | |||
// x needs a phi node | |||
lp_build_flow_scope_declare(flow, &x); | |||
lp_build_if(ctx, flow, builder, cond); | |||
x = LLVMAdd(1, 2); | |||
lp_build_else(ctx); | |||
x = LLVMAdd(2, 3); | |||
lp_build_endif(ctx); | |||
lp_build_flow_scope_end(flow); | |||
lp_build_flow_destroy(flow); | |||
*/ | |||
@@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, | |||
* Begin an if/else/endif construct. | |||
*/ | |||
void | |||
lp_build_if(struct lp_build_if_state *ctx, | |||
struct lp_build_flow_context *flow, | |||
lp_build_if(struct lp_build_if_state *ifthen, | |||
LLVMBuilderRef builder, | |||
LLVMValueRef condition) | |||
{ | |||
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); | |||
struct lp_build_flow_if *ifthen; | |||
unsigned i; | |||
memset(ctx, 0, sizeof(*ctx)); | |||
ctx->builder = builder; | |||
ctx->flow = flow; | |||
/* push/create new scope */ | |||
ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; | |||
assert(ifthen); | |||
ifthen->num_variables = flow->num_variables; | |||
memset(ifthen, 0, sizeof *ifthen); | |||
ifthen->builder = builder; | |||
ifthen->condition = condition; | |||
ifthen->entry_block = block; | |||
/* create a Phi node for each variable in this flow scope */ | |||
ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); | |||
if (!ifthen->phi) { | |||
ifthen->num_variables = 0; | |||
return; | |||
} | |||
/* create endif/merge basic block for the phi functions */ | |||
ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); | |||
LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); | |||
/* create a phi node for each variable */ | |||
for (i = 0; i < flow->num_variables; i++) { | |||
ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); | |||
/* add add the initial value of the var from the entry block */ | |||
if (!LLVMIsUndef(*flow->variables[i])) | |||
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], | |||
&ifthen->entry_block, 1); | |||
} | |||
/* create/insert true_block before merge_block */ | |||
ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); | |||
@@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx, | |||
* Begin else-part of a conditional | |||
*/ | |||
void | |||
lp_build_else(struct lp_build_if_state *ctx) | |||
lp_build_else(struct lp_build_if_state *ifthen) | |||
{ | |||
struct lp_build_flow_context *flow = ctx->flow; | |||
struct lp_build_flow_if *ifthen; | |||
unsigned i; | |||
ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; | |||
assert(ifthen); | |||
/* for each variable, update the Phi node with a (variable, block) pair */ | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); | |||
for (i = 0; i < flow->num_variables; i++) { | |||
assert(*flow->variables[i]); | |||
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); | |||
} | |||
/* Append an unconditional Br(anch) instruction on the true_block */ | |||
LLVMBuildBr(ifthen->builder, ifthen->merge_block); | |||
/* create/insert false_block before the merge block */ | |||
ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); | |||
/* successive code goes into the else block */ | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); | |||
LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block); | |||
} | |||
@@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx) | |||
* End a conditional. | |||
*/ | |||
void | |||
lp_build_endif(struct lp_build_if_state *ctx) | |||
lp_build_endif(struct lp_build_if_state *ifthen) | |||
{ | |||
struct lp_build_flow_context *flow = ctx->flow; | |||
struct lp_build_flow_if *ifthen; | |||
LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); | |||
unsigned i; | |||
ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; | |||
assert(ifthen); | |||
/* Insert branch to the merge block from current block */ | |||
LLVMBuildBr(ctx->builder, ifthen->merge_block); | |||
LLVMBuildBr(ifthen->builder, ifthen->merge_block); | |||
if (ifthen->false_block) { | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); | |||
/* for each variable, update the Phi node with a (variable, block) pair */ | |||
for (i = 0; i < flow->num_variables; i++) { | |||
assert(*flow->variables[i]); | |||
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); | |||
/* replace the variable ref with the phi function */ | |||
*flow->variables[i] = ifthen->phi[i]; | |||
} | |||
} | |||
else { | |||
/* no else clause */ | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); | |||
for (i = 0; i < flow->num_variables; i++) { | |||
assert(*flow->variables[i]); | |||
LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); | |||
/* replace the variable ref with the phi function */ | |||
*flow->variables[i] = ifthen->phi[i]; | |||
} | |||
} | |||
FREE(ifthen->phi); | |||
/*** | |||
*** Now patch in the various branch instructions. | |||
***/ | |||
/* | |||
* Now patch in the various branch instructions. | |||
*/ | |||
/* Insert the conditional branch instruction at the end of entry_block */ | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); | |||
LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block); | |||
if (ifthen->false_block) { | |||
/* we have an else clause */ | |||
LLVMBuildCondBr(ctx->builder, ifthen->condition, | |||
LLVMBuildCondBr(ifthen->builder, ifthen->condition, | |||
ifthen->true_block, ifthen->false_block); | |||
} | |||
else { | |||
/* no else clause */ | |||
LLVMBuildCondBr(ctx->builder, ifthen->condition, | |||
LLVMBuildCondBr(ifthen->builder, ifthen->condition, | |||
ifthen->true_block, ifthen->merge_block); | |||
} | |||
/* Insert branch from end of true_block to merge_block */ | |||
if (ifthen->false_block) { | |||
/* Append an unconditional Br(anch) instruction on the true_block */ | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); | |||
LLVMBuildBr(ctx->builder, ifthen->merge_block); | |||
} | |||
else { | |||
/* No else clause. | |||
* Note that we've already inserted the branch at the end of | |||
* true_block. See the very first LLVMBuildBr() call in this function. | |||
*/ | |||
} | |||
/* Resume building code at end of the ifthen->merge_block */ | |||
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); | |||
LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block); | |||
} | |||
@@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder, | |||
} | |||
res = LLVMBuildAlloca(first_builder, type, name); | |||
LLVMBuildStore(builder, LLVMConstNull(type), res); | |||
LLVMDisposeBuilder(first_builder); | |||
@@ -41,52 +41,49 @@ | |||
struct lp_type; | |||
struct lp_build_flow_context; | |||
struct lp_build_flow_context * | |||
lp_build_flow_create(LLVMBuilderRef builder); | |||
void | |||
lp_build_flow_destroy(struct lp_build_flow_context *flow); | |||
void | |||
lp_build_flow_scope_begin(struct lp_build_flow_context *flow); | |||
void | |||
lp_build_flow_scope_declare(struct lp_build_flow_context *flow, | |||
LLVMValueRef *variable); | |||
/** | |||
* Early exit. Useful to skip to the end of a function or block when | |||
* the execution mask becomes zero or when there is an error condition. | |||
*/ | |||
struct lp_build_skip_context | |||
{ | |||
LLVMBuilderRef builder; | |||
void | |||
lp_build_flow_scope_end(struct lp_build_flow_context *flow); | |||
/** Block to skip to */ | |||
LLVMBasicBlockRef block; | |||
}; | |||
void | |||
lp_build_flow_skip_begin(struct lp_build_flow_context *flow); | |||
lp_build_flow_skip_begin(struct lp_build_skip_context *ctx, | |||
LLVMBuilderRef builder); | |||
void | |||
lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, | |||
lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx, | |||
LLVMValueRef cond); | |||
void | |||
lp_build_flow_skip_end(struct lp_build_flow_context *flow); | |||
lp_build_flow_skip_end(struct lp_build_skip_context *ctx); | |||
struct lp_build_mask_context | |||
{ | |||
struct lp_build_flow_context *flow; | |||
struct lp_build_skip_context skip; | |||
LLVMTypeRef reg_type; | |||
LLVMValueRef value; | |||
LLVMValueRef var; | |||
}; | |||
void | |||
lp_build_mask_begin(struct lp_build_mask_context *mask, | |||
struct lp_build_flow_context *flow, | |||
LLVMBuilderRef builder, | |||
struct lp_type type, | |||
LLVMValueRef value); | |||
LLVMValueRef | |||
lp_build_mask_value(struct lp_build_mask_context *mask); | |||
/** | |||
* Bitwise AND the mask with the given value, if a previous mask was set. | |||
*/ | |||
@@ -94,6 +91,9 @@ void | |||
lp_build_mask_update(struct lp_build_mask_context *mask, | |||
LLVMValueRef value); | |||
void | |||
lp_build_mask_check(struct lp_build_mask_context *mask); | |||
LLVMValueRef | |||
lp_build_mask_end(struct lp_build_mask_context *mask); | |||
@@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask); | |||
struct lp_build_loop_state | |||
{ | |||
LLVMBasicBlockRef block; | |||
LLVMValueRef counter_var; | |||
LLVMValueRef counter; | |||
}; | |||
@@ -128,22 +129,28 @@ void | |||
lp_build_loop_end_cond(LLVMBuilderRef builder, | |||
LLVMValueRef end, | |||
LLVMValueRef step, | |||
int cond, /* LLVM condition */ | |||
LLVMIntPredicate cond, | |||
struct lp_build_loop_state *state); | |||
/** | |||
* if/else/endif. | |||
*/ | |||
struct lp_build_if_state | |||
{ | |||
LLVMBuilderRef builder; | |||
struct lp_build_flow_context *flow; | |||
LLVMValueRef condition; | |||
LLVMBasicBlockRef entry_block; | |||
LLVMBasicBlockRef true_block; | |||
LLVMBasicBlockRef false_block; | |||
LLVMBasicBlockRef merge_block; | |||
}; | |||
void | |||
lp_build_if(struct lp_build_if_state *ctx, | |||
struct lp_build_flow_context *flow, | |||
LLVMBuilderRef builder, | |||
LLVMValueRef condition); | |||
@@ -35,6 +35,7 @@ | |||
#include "util/u_format.h" | |||
#include "util/u_cpu_detect.h" | |||
#include "lp_bld_arit.h" | |||
#include "lp_bld_type.h" | |||
@@ -42,7 +43,7 @@ | |||
#include "lp_bld_conv.h" | |||
#include "lp_bld_gather.h" | |||
#include "lp_bld_format.h" | |||
#include "lp_bld_logic.h" | |||
/** | |||
* Extract Y, U, V channels from packed UYVY. | |||
@@ -59,7 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, | |||
LLVMValueRef *v) | |||
{ | |||
struct lp_type type; | |||
LLVMValueRef shift, mask; | |||
LLVMValueRef mask; | |||
memset(&type, 0, sizeof type); | |||
type.width = 32; | |||
@@ -69,14 +70,37 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, | |||
assert(lp_check_value(type, i)); | |||
/* | |||
* y = (uyvy >> 16*i) & 0xff | |||
* y = (uyvy >> (16*i + 8)) & 0xff | |||
* u = (uyvy ) & 0xff | |||
* v = (uyvy >> 16 ) & 0xff | |||
*/ | |||
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); | |||
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); | |||
*y = LLVMBuildLShr(builder, packed, shift, ""); | |||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) | |||
/* | |||
* Avoid shift with per-element count. | |||
* No support on x86, gets translated to roughly 5 instructions | |||
* per element. Didn't measure performance but cuts shader size | |||
* by quite a bit (less difference if cpu has no sse4.1 support). | |||
*/ | |||
if (util_cpu_caps.has_sse2 && n == 4) { | |||
LLVMValueRef sel, tmp, tmp2; | |||
struct lp_build_context bld32; | |||
lp_build_context_init(&bld32, builder, type); | |||
tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); | |||
tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), ""); | |||
sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0)); | |||
*y = lp_build_select(&bld32, sel, tmp, tmp2); | |||
} else | |||
#endif | |||
{ | |||
LLVMValueRef shift; | |||
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); | |||
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); | |||
*y = LLVMBuildLShr(builder, packed, shift, ""); | |||
} | |||
*u = packed; | |||
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); | |||
@@ -103,7 +127,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, | |||
LLVMValueRef *v) | |||
{ | |||
struct lp_type type; | |||
LLVMValueRef shift, mask; | |||
LLVMValueRef mask; | |||
memset(&type, 0, sizeof type); | |||
type.width = 32; | |||
@@ -118,8 +142,30 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, | |||
* v = (yuyv >> 24 ) & 0xff | |||
*/ | |||
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); | |||
*y = LLVMBuildLShr(builder, packed, shift, ""); | |||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) | |||
/* | |||
* Avoid shift with per-element count. | |||
* No support on x86, gets translated to roughly 5 instructions | |||
* per element. Didn't measure performance but cuts shader size | |||
* by quite a bit (less difference if cpu has no sse4.1 support). | |||
*/ | |||
if (util_cpu_caps.has_sse2 && n == 4) { | |||
LLVMValueRef sel, tmp; | |||
struct lp_build_context bld32; | |||
lp_build_context_init(&bld32, builder, type); | |||
tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); | |||
sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0)); | |||
*y = lp_build_select(&bld32, sel, packed, tmp); | |||
} else | |||
#endif | |||
{ | |||
LLVMValueRef shift; | |||
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); | |||
*y = LLVMBuildLShr(builder, packed, shift, ""); | |||
} | |||
*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); | |||
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), ""); | |||
@@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { | |||
{ "asm", GALLIVM_DEBUG_ASM, NULL }, | |||
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, | |||
{ "perf", GALLIVM_DEBUG_PERF, NULL }, | |||
{ "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL }, | |||
DEBUG_NAMED_VALUE_END | |||
}; | |||
@@ -47,4 +47,10 @@ lp_build_init(void); | |||
extern void | |||
lp_func_delete_body(LLVMValueRef func); | |||
extern LLVMValueRef | |||
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, | |||
const char *Name); | |||
#endif /* !LP_BLD_INIT_H */ |
@@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder, | |||
if(func == PIPE_FUNC_ALWAYS) | |||
return ones; | |||
/* TODO: optimize the constant case */ | |||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) | |||
/* | |||
* There are no unsigned integer comparison instructions in SSE. | |||
*/ | |||
/* XXX: It is not clear if we should use the ordered or unordered operators */ | |||
if (!type.floating && !type.sign && | |||
type.width * type.length == 128 && | |||
util_cpu_caps.has_sse2 && | |||
(func == PIPE_FUNC_LESS || | |||
func == PIPE_FUNC_LEQUAL || | |||
func == PIPE_FUNC_GREATER || | |||
func == PIPE_FUNC_GEQUAL) && | |||
(gallivm_debug & GALLIVM_DEBUG_PERF)) { | |||
debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", | |||
__FUNCTION__, type.length, type.width); | |||
} | |||
#endif | |||
#if HAVE_LLVM < 0x0207 | |||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) | |||
@@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder, | |||
#endif | |||
#endif /* HAVE_LLVM < 0x0207 */ | |||
/* XXX: It is not clear if we should use the ordered or unordered operators */ | |||
if(type.floating) { | |||
LLVMRealPredicate op; | |||
switch(func) { | |||
@@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld, | |||
LLVMTypeRef arg_type; | |||
LLVMValueRef args[3]; | |||
if (type.width == 64) { | |||
if (type.floating && | |||
type.width == 64) { | |||
intrinsic = "llvm.x86.sse41.blendvpd"; | |||
arg_type = LLVMVectorType(LLVMDoubleType(), 2); | |||
} else if (type.width == 32) { | |||
} else if (type.floating && | |||
type.width == 32) { | |||
intrinsic = "llvm.x86.sse41.blendvps"; | |||
arg_type = LLVMVectorType(LLVMFloatType(), 4); | |||
} else { |
@@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF) | |||
llvm::Function *func = llvm::unwrap<llvm::Function>(FF); | |||
func->deleteBody(); | |||
} | |||
extern "C" | |||
LLVMValueRef | |||
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, | |||
const char *Name) | |||
{ | |||
return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name)); | |||
} | |||
@@ -29,6 +29,8 @@ | |||
#include "util/u_debug.h" | |||
#include "util/u_memory.h" | |||
#include "util/u_string.h" | |||
#include "lp_bld_const.h" | |||
#include "lp_bld_printf.h" | |||
@@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) | |||
return LLVMBuildCall(builder, func_printf, params, argcount + 1, ""); | |||
} | |||
/** | |||
* Print a float[4] vector. | |||
*/ | |||
LLVMValueRef | |||
lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec) | |||
{ | |||
char format[1000]; | |||
LLVMValueRef x, y, z, w; | |||
x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), ""); | |||
y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), ""); | |||
z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), ""); | |||
w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), ""); | |||
util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg); | |||
return lp_build_printf(builder, format, x, y, z, w); | |||
} |
@@ -35,5 +35,9 @@ | |||
LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len); | |||
LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...); | |||
LLVMValueRef | |||
lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec); | |||
#endif | |||
@@ -81,11 +81,15 @@ LLVMValueRef | |||
lp_build_scalar_ddx(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
{ | |||
LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); | |||
LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); | |||
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); | |||
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); | |||
return lp_build_sub(bld, a_right, a_left); | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); | |||
LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0); | |||
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left"); | |||
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right"); | |||
if (bld->type.floating) | |||
return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx"); | |||
else | |||
return LLVMBuildSub(bld->builder, a_right, a_left, "ddx"); | |||
} | |||
@@ -93,9 +97,13 @@ LLVMValueRef | |||
lp_build_scalar_ddy(struct lp_build_context *bld, | |||
LLVMValueRef a) | |||
{ | |||
LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); | |||
LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); | |||
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); | |||
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); | |||
return lp_build_sub(bld, a_bottom, a_top); | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); | |||
LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0); | |||
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top"); | |||
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom"); | |||
if (bld->type.floating) | |||
return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy"); | |||
else | |||
return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy"); | |||
} |
@@ -39,12 +39,52 @@ | |||
#include "lp_bld_arit.h" | |||
#include "lp_bld_const.h" | |||
#include "lp_bld_debug.h" | |||
#include "lp_bld_printf.h" | |||
#include "lp_bld_flow.h" | |||
#include "lp_bld_sample.h" | |||
#include "lp_bld_swizzle.h" | |||
#include "lp_bld_type.h" | |||
/* | |||
* Bri-linear factor. Should be greater than one. | |||
*/ | |||
#define BRILINEAR_FACTOR 2 | |||
/** | |||
* Does the given texture wrap mode allow sampling the texture border color? | |||
* XXX maybe move this into gallium util code. | |||
*/ | |||
boolean | |||
lp_sampler_wrap_mode_uses_border_color(unsigned mode, | |||
unsigned min_img_filter, | |||
unsigned mag_img_filter) | |||
{ | |||
switch (mode) { | |||
case PIPE_TEX_WRAP_REPEAT: | |||
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: | |||
case PIPE_TEX_WRAP_MIRROR_REPEAT: | |||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: | |||
return FALSE; | |||
case PIPE_TEX_WRAP_CLAMP: | |||
case PIPE_TEX_WRAP_MIRROR_CLAMP: | |||
if (min_img_filter == PIPE_TEX_FILTER_NEAREST && | |||
mag_img_filter == PIPE_TEX_FILTER_NEAREST) { | |||
return FALSE; | |||
} else { | |||
return TRUE; | |||
} | |||
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: | |||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: | |||
return TRUE; | |||
default: | |||
assert(0 && "unexpected wrap mode"); | |||
return FALSE; | |||
} | |||
} | |||
/** | |||
* Initialize lp_sampler_static_state object with the gallium sampler | |||
* and texture state. | |||
@@ -93,31 +133,40 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, | |||
state->wrap_r = sampler->wrap_r; | |||
state->min_img_filter = sampler->min_img_filter; | |||
state->mag_img_filter = sampler->mag_img_filter; | |||
if (view->last_level) { | |||
if (view->last_level && sampler->max_lod > 0.0f) { | |||
state->min_mip_filter = sampler->min_mip_filter; | |||
} else { | |||
state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; | |||
} | |||
if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { | |||
if (sampler->lod_bias != 0.0f) { | |||
state->lod_bias_non_zero = 1; | |||
} | |||
/* If min_lod == max_lod we can greatly simplify mipmap selection. | |||
* This is a case that occurs during automatic mipmap generation. | |||
*/ | |||
if (sampler->min_lod == sampler->max_lod) { | |||
state->min_max_lod_equal = 1; | |||
} else { | |||
if (sampler->min_lod > 0.0f) { | |||
state->apply_min_lod = 1; | |||
} | |||
if (sampler->max_lod < (float)view->last_level) { | |||
state->apply_max_lod = 1; | |||
} | |||
} | |||
} | |||
state->compare_mode = sampler->compare_mode; | |||
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { | |||
state->compare_func = sampler->compare_func; | |||
} | |||
state->normalized_coords = sampler->normalized_coords; | |||
state->lod_bias = sampler->lod_bias; | |||
if (!view->last_level && | |||
sampler->min_img_filter == sampler->mag_img_filter) { | |||
state->min_lod = 0.0f; | |||
state->max_lod = 0.0f; | |||
} else { | |||
state->min_lod = MAX2(sampler->min_lod, 0.0f); | |||
state->max_lod = sampler->max_lod; | |||
} | |||
state->border_color[0] = sampler->border_color[0]; | |||
state->border_color[1] = sampler->border_color[1]; | |||
state->border_color[2] = sampler->border_color[2]; | |||
state->border_color[3] = sampler->border_color[3]; | |||
/* | |||
* FIXME: Handle the remainder of pipe_sampler_view. | |||
@@ -125,6 +174,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, | |||
} | |||
/** | |||
* Generate code to compute coordinate gradient (rho). | |||
* \param ddx partial derivatives of (s, t, r, q) with respect to X | |||
* \param ddy partial derivatives of (s, t, r, q) with respect to Y | |||
* | |||
* XXX: The resulting rho is scalar, so we ignore all but the first element of | |||
* derivatives that are passed by the shader. | |||
*/ | |||
static LLVMValueRef | |||
lp_build_rho(struct lp_build_sample_context *bld, | |||
const LLVMValueRef ddx[4], | |||
const LLVMValueRef ddy[4]) | |||
{ | |||
struct lp_build_context *float_size_bld = &bld->float_size_bld; | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
const unsigned dims = bld->dims; | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); | |||
LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); | |||
LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); | |||
LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; | |||
LLVMValueRef rho_x, rho_y; | |||
LLVMValueRef rho_vec; | |||
LLVMValueRef float_size; | |||
LLVMValueRef rho; | |||
dsdx = ddx[0]; | |||
dsdy = ddy[0]; | |||
if (dims <= 1) { | |||
rho_x = dsdx; | |||
rho_y = dsdy; | |||
} | |||
else { | |||
rho_x = float_size_bld->undef; | |||
rho_y = float_size_bld->undef; | |||
rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, ""); | |||
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, ""); | |||
dtdx = ddx[1]; | |||
dtdy = ddy[1]; | |||
rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, ""); | |||
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, ""); | |||
if (dims >= 3) { | |||
drdx = ddx[2]; | |||
drdy = ddy[2]; | |||
rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, ""); | |||
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, ""); | |||
} | |||
} | |||
rho_x = lp_build_abs(float_size_bld, rho_x); | |||
rho_y = lp_build_abs(float_size_bld, rho_y); | |||
rho_vec = lp_build_max(float_size_bld, rho_x, rho_y); | |||
float_size = lp_build_int_to_float(float_size_bld, bld->int_size); | |||
rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); | |||
if (dims <= 1) { | |||
rho = rho_vec; | |||
} | |||
else { | |||
if (dims >= 2) { | |||
LLVMValueRef rho_s, rho_t, rho_r; | |||
rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); | |||
rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, ""); | |||
rho = lp_build_max(float_bld, rho_s, rho_t); | |||
if (dims >= 3) { | |||
rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); | |||
rho = lp_build_max(float_bld, rho, rho_r); | |||
} | |||
} | |||
} | |||
return rho; | |||
} | |||
/* | |||
* Bri-linear lod computation | |||
* | |||
* Use a piece-wise linear approximation of log2 such that: | |||
* - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc. | |||
* - linear approximation for values in the neighborhood of 0.5, 1.5., etc, | |||
* with the steepness specified in 'factor' | |||
* - exact result for 0.5, 1.5, etc. | |||
* | |||
* | |||
* 1.0 - /----* | |||
* / | |||
* / | |||
* / | |||
* 0.5 - * | |||
* / | |||
* / | |||
* / | |||
* 0.0 - *----/ | |||
* | |||
* | | | |||
* 2^0 2^1 | |||
* | |||
* This is a technique also commonly used in hardware: | |||
* - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html | |||
* | |||
* TODO: For correctness, this should only be applied when texture is known to | |||
* have regular mipmaps, i.e., mipmaps derived from the base level. | |||
* | |||
* TODO: This could be done in fixed point, where applicable. | |||
*/ | |||
static void | |||
lp_build_brilinear_lod(struct lp_build_context *bld, | |||
LLVMValueRef lod, | |||
double factor, | |||
LLVMValueRef *out_lod_ipart, | |||
LLVMValueRef *out_lod_fpart) | |||
{ | |||
LLVMValueRef lod_fpart; | |||
double pre_offset = (factor - 0.5)/factor - 0.5; | |||
double post_offset = 1 - factor; | |||
if (0) { | |||
lp_build_printf(bld->builder, "lod = %f\n", lod); | |||
} | |||
lod = lp_build_add(bld, lod, | |||
lp_build_const_vec(bld->type, pre_offset)); | |||
lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); | |||
lod_fpart = lp_build_mul(bld, lod_fpart, | |||
lp_build_const_vec(bld->type, factor)); | |||
lod_fpart = lp_build_add(bld, lod_fpart, | |||
lp_build_const_vec(bld->type, post_offset)); | |||
/* | |||
* It's not necessary to clamp lod_fpart since: | |||
* - the above expression will never produce numbers greater than one. | |||
* - the mip filtering branch is only taken if lod_fpart is positive | |||
*/ | |||
*out_lod_fpart = lod_fpart; | |||
if (0) { | |||
lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart); | |||
lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart); | |||
} | |||
} | |||
/* | |||
* Combined log2 and brilinear lod computation. | |||
* | |||
* It's in all identical to calling lp_build_fast_log2() and | |||
* lp_build_brilinear_lod() above, but by combining we can compute the interger | |||
* and fractional part independently. | |||
*/ | |||
static void | |||
lp_build_brilinear_rho(struct lp_build_context *bld, | |||
LLVMValueRef rho, | |||
double factor, | |||
LLVMValueRef *out_lod_ipart, | |||
LLVMValueRef *out_lod_fpart) | |||
{ | |||
LLVMValueRef lod_ipart; | |||
LLVMValueRef lod_fpart; | |||
const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); | |||
const double post_offset = 1 - 2*factor; | |||
assert(bld->type.floating); | |||
assert(lp_check_value(bld->type, rho)); | |||
/* | |||
* The pre factor will make the intersections with the exact powers of two | |||
* happen precisely where we want then to be, which means that the integer | |||
* part will not need any post adjustments. | |||
*/ | |||
rho = lp_build_mul(bld, rho, | |||
lp_build_const_vec(bld->type, pre_factor)); | |||
/* ipart = ifloor(log2(rho)) */ | |||
lod_ipart = lp_build_extract_exponent(bld, rho, 0); | |||
/* fpart = rho / 2**ipart */ | |||
lod_fpart = lp_build_extract_mantissa(bld, rho); | |||
lod_fpart = lp_build_mul(bld, lod_fpart, | |||
lp_build_const_vec(bld->type, factor)); | |||
lod_fpart = lp_build_add(bld, lod_fpart, | |||
lp_build_const_vec(bld->type, post_offset)); | |||
/* | |||
* Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: | |||
* - the above expression will never produce numbers greater than one. | |||
* - the mip filtering branch is only taken if lod_fpart is positive | |||
*/ | |||
*out_lod_ipart = lod_ipart; | |||
*out_lod_fpart = lod_fpart; | |||
} | |||
/** | |||
* Generate code to compute texture level of detail (lambda). | |||
* \param ddx partial derivatives of (s, t, r, q) with respect to X | |||
@@ -138,83 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, | |||
* XXX: The resulting lod is scalar, so ignore all but the first element of | |||
* derivatives, lod_bias, etc that are passed by the shader. | |||
*/ | |||
LLVMValueRef | |||
void | |||
lp_build_lod_selector(struct lp_build_sample_context *bld, | |||
unsigned unit, | |||
const LLVMValueRef ddx[4], | |||
const LLVMValueRef ddy[4], | |||
LLVMValueRef lod_bias, /* optional */ | |||
LLVMValueRef explicit_lod, /* optional */ | |||
LLVMValueRef width, | |||
LLVMValueRef height, | |||
LLVMValueRef depth) | |||
unsigned mip_filter, | |||
LLVMValueRef *out_lod_ipart, | |||
LLVMValueRef *out_lod_fpart) | |||
{ | |||
if (bld->static_state->min_lod == bld->static_state->max_lod) { | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
LLVMValueRef lod; | |||
*out_lod_ipart = bld->int_bld.zero; | |||
*out_lod_fpart = bld->float_bld.zero; | |||
if (bld->static_state->min_max_lod_equal) { | |||
/* User is forcing sampling from a particular mipmap level. | |||
* This is hit during mipmap generation. | |||
*/ | |||
return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); | |||
LLVMValueRef min_lod = | |||
bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); | |||
lod = min_lod; | |||
} | |||
else { | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), | |||
bld->static_state->lod_bias); | |||
LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), | |||
bld->static_state->min_lod); | |||
LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), | |||
bld->static_state->max_lod); | |||
LLVMValueRef sampler_lod_bias = | |||
bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit); | |||
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); | |||
LLVMValueRef lod; | |||
if (explicit_lod) { | |||
lod = LLVMBuildExtractElement(bld->builder, explicit_lod, | |||
index0, ""); | |||
} | |||
else { | |||
const int dims = texture_dims(bld->static_state->target); | |||
LLVMValueRef dsdx, dsdy; | |||
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; | |||
LLVMValueRef rho; | |||
dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); | |||
dsdx = lp_build_abs(float_bld, dsdx); | |||
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); | |||
dsdy = lp_build_abs(float_bld, dsdy); | |||
if (dims > 1) { | |||
dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); | |||
dtdx = lp_build_abs(float_bld, dtdx); | |||
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); | |||
dtdy = lp_build_abs(float_bld, dtdy); | |||
if (dims > 2) { | |||
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); | |||
drdx = lp_build_abs(float_bld, drdx); | |||
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); | |||
drdy = lp_build_abs(float_bld, drdy); | |||
} | |||
} | |||
rho = lp_build_rho(bld, ddx, ddy); | |||
/* Compute rho = max of all partial derivatives scaled by texture size. | |||
* XXX this could be vectorized somewhat | |||
/* | |||
* Compute lod = log2(rho) | |||
*/ | |||
rho = LLVMBuildFMul(bld->builder, | |||
lp_build_max(float_bld, dsdx, dsdy), | |||
lp_build_int_to_float(float_bld, width), ""); | |||
if (dims > 1) { | |||
LLVMValueRef max; | |||
max = LLVMBuildFMul(bld->builder, | |||
lp_build_max(float_bld, dtdx, dtdy), | |||
lp_build_int_to_float(float_bld, height), ""); | |||
rho = lp_build_max(float_bld, rho, max); | |||
if (dims > 2) { | |||
max = LLVMBuildFMul(bld->builder, | |||
lp_build_max(float_bld, drdx, drdy), | |||
lp_build_int_to_float(float_bld, depth), ""); | |||
rho = lp_build_max(float_bld, rho, max); | |||
if (!lod_bias && | |||
!bld->static_state->lod_bias_non_zero && | |||
!bld->static_state->apply_max_lod && | |||
!bld->static_state->apply_min_lod) { | |||
/* | |||
* Special case when there are no post-log2 adjustments, which | |||
* saves instructions but keeping the integer and fractional lod | |||
* computations separate from the start. | |||
*/ | |||
if (mip_filter == PIPE_TEX_MIPFILTER_NONE || | |||
mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { | |||
*out_lod_ipart = lp_build_ilog2(float_bld, rho); | |||
*out_lod_fpart = bld->float_bld.zero; | |||
return; | |||
} | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && | |||
!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { | |||
lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, | |||
out_lod_ipart, out_lod_fpart); | |||
return; | |||
} | |||
} | |||
/* compute lod = log2(rho) */ | |||
lod = lp_build_log2(float_bld, rho); | |||
if (0) { | |||
lod = lp_build_log2(float_bld, rho); | |||
} | |||
else { | |||
lod = lp_build_fast_log2(float_bld, rho); | |||
} | |||
/* add shader lod bias */ | |||
if (lod_bias) { | |||
@@ -225,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, | |||
} | |||
/* add sampler lod bias */ | |||
lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); | |||
if (bld->static_state->lod_bias_non_zero) | |||
lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); | |||
/* clamp lod */ | |||
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); | |||
if (bld->static_state->apply_max_lod) { | |||
LLVMValueRef max_lod = | |||
bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit); | |||
return lod; | |||
lod = lp_build_min(float_bld, lod, max_lod); | |||
} | |||
if (bld->static_state->apply_min_lod) { | |||
LLVMValueRef min_lod = | |||
bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); | |||
lod = lp_build_max(float_bld, lod, min_lod); | |||
} | |||
} | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { | |||
if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { | |||
lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, | |||
out_lod_ipart, out_lod_fpart); | |||
} | |||
else { | |||
lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); | |||
} | |||
lp_build_name(*out_lod_fpart, "lod_fpart"); | |||
} | |||
else { | |||
*out_lod_ipart = lp_build_iround(float_bld, lod); | |||
} | |||
lp_build_name(*out_lod_ipart, "lod_ipart"); | |||
return; | |||
} | |||
@@ -245,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, | |||
void | |||
lp_build_nearest_mip_level(struct lp_build_sample_context *bld, | |||
unsigned unit, | |||
LLVMValueRef lod, | |||
LLVMValueRef lod_ipart, | |||
LLVMValueRef *level_out) | |||
{ | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
struct lp_build_context *int_bld = &bld->int_bld; | |||
LLVMValueRef last_level, level; | |||
@@ -258,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, | |||
bld->builder, unit); | |||
/* convert float lod to integer */ | |||
level = lp_build_iround(float_bld, lod); | |||
level = lod_ipart; | |||
/* clamp level to legal range of levels */ | |||
*level_out = lp_build_clamp(int_bld, level, zero, last_level); | |||
@@ -273,43 +563,77 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, | |||
void | |||
lp_build_linear_mip_levels(struct lp_build_sample_context *bld, | |||
unsigned unit, | |||
LLVMValueRef lod, | |||
LLVMValueRef lod_ipart, | |||
LLVMValueRef *lod_fpart_inout, | |||
LLVMValueRef *level0_out, | |||
LLVMValueRef *level1_out, | |||
LLVMValueRef *weight_out) | |||
LLVMValueRef *level1_out) | |||
{ | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
LLVMBuilderRef builder = bld->builder; | |||
struct lp_build_context *int_bld = &bld->int_bld; | |||
LLVMValueRef last_level, level; | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
LLVMValueRef last_level; | |||
LLVMValueRef clamp_min; | |||
LLVMValueRef clamp_max; | |||
*level0_out = lod_ipart; | |||
*level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one); | |||
last_level = bld->dynamic_state->last_level(bld->dynamic_state, | |||
bld->builder, unit); | |||
/* convert float lod to integer */ | |||
level = lp_build_ifloor(float_bld, lod); | |||
/* compute level 0 and clamp to legal range of levels */ | |||
*level0_out = lp_build_clamp(int_bld, level, | |||
int_bld->zero, | |||
last_level); | |||
/* compute level 1 and clamp to legal range of levels */ | |||
level = lp_build_add(int_bld, level, int_bld->one); | |||
*level1_out = lp_build_clamp(int_bld, level, | |||
int_bld->zero, | |||
last_level); | |||
*weight_out = lp_build_fract(float_bld, lod); | |||
/* | |||
* Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the | |||
* minimum number of comparisons, and zeroing lod_fpart in the extreme | |||
* ends in the process. | |||
*/ | |||
/* lod_ipart < 0 */ | |||
clamp_min = LLVMBuildICmp(builder, LLVMIntSLT, | |||
lod_ipart, int_bld->zero, | |||
"clamp_lod_to_zero"); | |||
*level0_out = LLVMBuildSelect(builder, clamp_min, | |||
int_bld->zero, *level0_out, ""); | |||
*level1_out = LLVMBuildSelect(builder, clamp_min, | |||
int_bld->zero, *level1_out, ""); | |||
*lod_fpart_inout = LLVMBuildSelect(builder, clamp_min, | |||
float_bld->zero, *lod_fpart_inout, ""); | |||
/* lod_ipart >= last_level */ | |||
clamp_max = LLVMBuildICmp(builder, LLVMIntSGE, | |||
lod_ipart, last_level, | |||
"clamp_lod_to_last"); | |||
*level0_out = LLVMBuildSelect(builder, clamp_max, | |||
last_level, *level0_out, ""); | |||
*level1_out = LLVMBuildSelect(builder, clamp_max, | |||
last_level, *level1_out, ""); | |||
*lod_fpart_inout = LLVMBuildSelect(builder, clamp_max, | |||
float_bld->zero, *lod_fpart_inout, ""); | |||
lp_build_name(*level0_out, "sampler%u_miplevel0", unit); | |||
lp_build_name(*level1_out, "sampler%u_miplevel1", unit); | |||
lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit); | |||
} | |||
/** | |||
* Return pointer to a single mipmap level. | |||
* \param data_array array of pointers to mipmap levels | |||
* \param level integer mipmap level | |||
*/ | |||
LLVMValueRef | |||
lp_build_get_mipmap_level(struct lp_build_sample_context *bld, | |||
LLVMValueRef data_array, LLVMValueRef level) | |||
LLVMValueRef level) | |||
{ | |||
LLVMValueRef indexes[2], data_ptr; | |||
indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); | |||
indexes[1] = level; | |||
data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); | |||
data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, ""); | |||
data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); | |||
return data_ptr; | |||
} | |||
@@ -317,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld, | |||
LLVMValueRef | |||
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, | |||
LLVMValueRef data_array, int level) | |||
int level) | |||
{ | |||
LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); | |||
return lp_build_get_mipmap_level(bld, data_array, lvl); | |||
return lp_build_get_mipmap_level(bld, lvl); | |||
} | |||
@@ -329,13 +653,24 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, | |||
* Return max(1, base_size >> level); | |||
*/ | |||
static LLVMValueRef | |||
lp_build_minify(struct lp_build_sample_context *bld, | |||
lp_build_minify(struct lp_build_context *bld, | |||
LLVMValueRef base_size, | |||
LLVMValueRef level) | |||
{ | |||
LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); | |||
size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); | |||
return size; | |||
assert(lp_check_value(bld->type, base_size)); | |||
assert(lp_check_value(bld->type, level)); | |||
if (level == bld->zero) { | |||
/* if we're using mipmap level zero, no minification is needed */ | |||
return base_size; | |||
} | |||
else { | |||
LLVMValueRef size = | |||
LLVMBuildLShr(bld->builder, base_size, level, "minify"); | |||
assert(bld->type.sign); | |||
size = lp_build_max(bld, size, bld->one); | |||
return size; | |||
} | |||
} | |||
@@ -364,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, | |||
*/ | |||
void | |||
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, | |||
unsigned dims, | |||
LLVMValueRef width_vec, | |||
LLVMValueRef height_vec, | |||
LLVMValueRef depth_vec, | |||
LLVMValueRef ilevel0, | |||
LLVMValueRef ilevel1, | |||
LLVMValueRef row_stride_array, | |||
LLVMValueRef img_stride_array, | |||
LLVMValueRef *width0_vec, | |||
LLVMValueRef *width1_vec, | |||
LLVMValueRef *height0_vec, | |||
LLVMValueRef *height1_vec, | |||
LLVMValueRef *depth0_vec, | |||
LLVMValueRef *depth1_vec, | |||
LLVMValueRef *row_stride0_vec, | |||
LLVMValueRef *row_stride1_vec, | |||
LLVMValueRef *img_stride0_vec, | |||
LLVMValueRef *img_stride1_vec) | |||
LLVMValueRef ilevel, | |||
LLVMValueRef *out_size, | |||
LLVMValueRef *row_stride_vec, | |||
LLVMValueRef *img_stride_vec) | |||
{ | |||
const unsigned mip_filter = bld->static_state->min_mip_filter; | |||
LLVMValueRef ilevel0_vec, ilevel1_vec; | |||
const unsigned dims = bld->dims; | |||
LLVMValueRef ilevel_vec; | |||
ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) | |||
ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); | |||
ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); | |||
/* | |||
* Compute width, height, depth at mipmap level 'ilevel0' | |||
* Compute width, height, depth at mipmap level 'ilevel' | |||
*/ | |||
*width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); | |||
*out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); | |||
if (dims >= 2) { | |||
*height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); | |||
*row_stride0_vec = lp_build_get_level_stride_vec(bld, | |||
row_stride_array, | |||
ilevel0); | |||
*row_stride_vec = lp_build_get_level_stride_vec(bld, | |||
bld->row_stride_array, | |||
ilevel); | |||
if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { | |||
*img_stride0_vec = lp_build_get_level_stride_vec(bld, | |||
img_stride_array, | |||
ilevel0); | |||
if (dims == 3) { | |||
*depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); | |||
} | |||
*img_stride_vec = lp_build_get_level_stride_vec(bld, | |||
bld->img_stride_array, | |||
ilevel); | |||
} | |||
} | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { | |||
/* compute width, height, depth for second mipmap level at 'ilevel1' */ | |||
*width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); | |||
if (dims >= 2) { | |||
*height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); | |||
*row_stride1_vec = lp_build_get_level_stride_vec(bld, | |||
row_stride_array, | |||
ilevel1); | |||
if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { | |||
*img_stride1_vec = lp_build_get_level_stride_vec(bld, | |||
img_stride_array, | |||
ilevel1); | |||
if (dims == 3) { | |||
*depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); | |||
} | |||
} | |||
} | |||
/** | |||
* Extract and broadcast texture size. | |||
* | |||
* @param size_type type of the texture size vector (either | |||
* bld->int_size_type or bld->float_size_type) | |||
* @param coord_type type of the texture size vector (either | |||
* bld->int_coord_type or bld->coord_type) | |||
* @param int_size vector with the integer texture size (width, height, | |||
* depth) | |||
*/ | |||
void | |||
lp_build_extract_image_sizes(struct lp_build_sample_context *bld, | |||
struct lp_type size_type, | |||
struct lp_type coord_type, | |||
LLVMValueRef size, | |||
LLVMValueRef *out_width, | |||
LLVMValueRef *out_height, | |||
LLVMValueRef *out_depth) | |||
{ | |||
const unsigned dims = bld->dims; | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
*out_width = lp_build_extract_broadcast(bld->builder, | |||
size_type, | |||
coord_type, | |||
size, | |||
LLVMConstInt(i32t, 0, 0)); | |||
if (dims >= 2) { | |||
*out_height = lp_build_extract_broadcast(bld->builder, | |||
size_type, | |||
coord_type, | |||
size, | |||
LLVMConstInt(i32t, 1, 0)); | |||
if (dims == 3) { | |||
*out_depth = lp_build_extract_broadcast(bld->builder, | |||
size_type, | |||
coord_type, | |||
size, | |||
LLVMConstInt(i32t, 2, 0)); | |||
} | |||
} | |||
} | |||
/** | |||
* Unnormalize coords. | |||
* | |||
* @param int_size vector with the integer texture size (width, height, depth) | |||
*/ | |||
void | |||
lp_build_unnormalized_coords(struct lp_build_sample_context *bld, | |||
LLVMValueRef flt_size, | |||
LLVMValueRef *s, | |||
LLVMValueRef *t, | |||
LLVMValueRef *r) | |||
{ | |||
const unsigned dims = bld->dims; | |||
LLVMValueRef width; | |||
LLVMValueRef height; | |||
LLVMValueRef depth; | |||
lp_build_extract_image_sizes(bld, | |||
bld->float_size_type, | |||
bld->coord_type, | |||
flt_size, | |||
&width, | |||
&height, | |||
&depth); | |||
/* s = s * width, t = t * height */ | |||
*s = lp_build_mul(&bld->coord_bld, *s, width); | |||
if (dims >= 2) { | |||
*t = lp_build_mul(&bld->coord_bld, *t, height); | |||
if (dims >= 3) { | |||
*r = lp_build_mul(&bld->coord_bld, *r, depth); | |||
} | |||
} | |||
} | |||
/** Helper used by lp_build_cube_lookup() */ | |||
static LLVMValueRef | |||
@@ -547,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, | |||
rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); | |||
{ | |||
struct lp_build_flow_context *flow_ctx; | |||
struct lp_build_if_state if_ctx; | |||
LLVMValueRef face_s_var; | |||
LLVMValueRef face_t_var; | |||
LLVMValueRef face_var; | |||
flow_ctx = lp_build_flow_create(bld->builder); | |||
lp_build_flow_scope_begin(flow_ctx); | |||
*face_s = bld->coord_bld.undef; | |||
*face_t = bld->coord_bld.undef; | |||
*face = bld->int_bld.undef; | |||
lp_build_name(*face_s, "face_s"); | |||
lp_build_name(*face_t, "face_t"); | |||
lp_build_name(*face, "face"); | |||
face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var"); | |||
face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var"); | |||
face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var"); | |||
lp_build_flow_scope_declare(flow_ctx, face_s); | |||
lp_build_flow_scope_declare(flow_ctx, face_t); | |||
lp_build_flow_scope_declare(flow_ctx, face); | |||
lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); | |||
lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz); | |||
{ | |||
/* +/- X face */ | |||
LLVMValueRef sign = lp_build_sgn(float_bld, rx); | |||
@@ -575,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, | |||
*face = lp_build_cube_face(bld, rx, | |||
PIPE_TEX_FACE_POS_X, | |||
PIPE_TEX_FACE_NEG_X); | |||
LLVMBuildStore(bld->builder, *face_s, face_s_var); | |||
LLVMBuildStore(bld->builder, *face_t, face_t_var); | |||
LLVMBuildStore(bld->builder, *face, face_var); | |||
} | |||
lp_build_else(&if_ctx); | |||
{ | |||
struct lp_build_flow_context *flow_ctx2; | |||
struct lp_build_if_state if_ctx2; | |||
LLVMValueRef face_s2 = bld->coord_bld.undef; | |||
LLVMValueRef face_t2 = bld->coord_bld.undef; | |||
LLVMValueRef face2 = bld->int_bld.undef; | |||
flow_ctx2 = lp_build_flow_create(bld->builder); | |||
lp_build_flow_scope_begin(flow_ctx2); | |||
lp_build_flow_scope_declare(flow_ctx2, &face_s2); | |||
lp_build_flow_scope_declare(flow_ctx2, &face_t2); | |||
lp_build_flow_scope_declare(flow_ctx2, &face2); | |||
ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); | |||
lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); | |||
lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz); | |||
{ | |||
/* +/- Y face */ | |||
LLVMValueRef sign = lp_build_sgn(float_bld, ry); | |||
LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); | |||
face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); | |||
face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); | |||
face2 = lp_build_cube_face(bld, ry, | |||
*face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); | |||
*face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); | |||
*face = lp_build_cube_face(bld, ry, | |||
PIPE_TEX_FACE_POS_Y, | |||
PIPE_TEX_FACE_NEG_Y); | |||
LLVMBuildStore(bld->builder, *face_s, face_s_var); | |||
LLVMBuildStore(bld->builder, *face_t, face_t_var); | |||
LLVMBuildStore(bld->builder, *face, face_var); | |||
} | |||
lp_build_else(&if_ctx2); | |||
{ | |||
/* +/- Z face */ | |||
LLVMValueRef sign = lp_build_sgn(float_bld, rz); | |||
LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); | |||
face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); | |||
face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); | |||
face2 = lp_build_cube_face(bld, rz, | |||
*face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); | |||
*face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); | |||
*face = lp_build_cube_face(bld, rz, | |||
PIPE_TEX_FACE_POS_Z, | |||
PIPE_TEX_FACE_NEG_Z); | |||
LLVMBuildStore(bld->builder, *face_s, face_s_var); | |||
LLVMBuildStore(bld->builder, *face_t, face_t_var); | |||
LLVMBuildStore(bld->builder, *face, face_var); | |||
} | |||
lp_build_endif(&if_ctx2); | |||
lp_build_flow_scope_end(flow_ctx2); | |||
lp_build_flow_destroy(flow_ctx2); | |||
*face_s = face_s2; | |||
*face_t = face_t2; | |||
*face = face2; | |||
} | |||
lp_build_endif(&if_ctx); | |||
lp_build_flow_scope_end(flow_ctx); | |||
lp_build_flow_destroy(flow_ctx); | |||
*face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s"); | |||
*face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t"); | |||
*face = LLVMBuildLoad(bld->builder, face_var, "face"); | |||
} | |||
} | |||
@@ -659,11 +1022,21 @@ lp_build_sample_partial_offset(struct lp_build_context *bld, | |||
* Pixel blocks have power of two dimensions. LLVM should convert the | |||
* rem/div to bit arithmetic. | |||
* TODO: Verify this. | |||
* It does indeed BUT it does transform it to scalar (and back) when doing so | |||
* (using roughly extract, shift/and, mov, unpack) (llvm 2.7). | |||
* The generated code looks seriously unfunny and is quite expensive. | |||
*/ | |||
#if 0 | |||
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); | |||
subcoord = LLVMBuildURem(bld->builder, coord, block_width, ""); | |||
coord = LLVMBuildUDiv(bld->builder, coord, block_width, ""); | |||
#else | |||
unsigned logbase2 = util_unsigned_logbase2(block_length); | |||
LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2); | |||
LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1); | |||
subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, ""); | |||
coord = LLVMBuildLShr(bld->builder, coord, block_shift, ""); | |||
#endif | |||
} | |||
offset = lp_build_mul(bld, coord, stride); |
@@ -82,12 +82,10 @@ struct lp_sampler_static_state | |||
unsigned compare_mode:1; | |||
unsigned compare_func:3; | |||
unsigned normalized_coords:1; | |||
float lod_bias, min_lod, max_lod; | |||
float border_color[4]; | |||
/* Aero hacks */ | |||
unsigned force_nearest_s:1; | |||
unsigned force_nearest_t:1; | |||
unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */ | |||
unsigned lod_bias_non_zero:1; | |||
unsigned apply_min_lod:1; /**< min_lod > 0 ? */ | |||
unsigned apply_max_lod:1; /**< max_lod < last_level ? */ | |||
}; | |||
@@ -104,45 +102,67 @@ struct lp_sampler_static_state | |||
struct lp_sampler_dynamic_state | |||
{ | |||
/** Obtain the base texture width. */ | |||
/** Obtain the base texture width (returns int32) */ | |||
LLVMValueRef | |||
(*width)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain the base texture height. */ | |||
/** Obtain the base texture height (returns int32) */ | |||
LLVMValueRef | |||
(*height)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain the base texture depth. */ | |||
/** Obtain the base texture depth (returns int32) */ | |||
LLVMValueRef | |||
(*depth)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain the number of mipmap levels (minus one). */ | |||
/** Obtain the number of mipmap levels minus one (returns int32) */ | |||
LLVMValueRef | |||
(*last_level)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain stride in bytes between image rows/blocks (returns int32) */ | |||
LLVMValueRef | |||
(*row_stride)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain stride in bytes between image slices (returns int32) */ | |||
LLVMValueRef | |||
(*img_stride)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain pointer to array of pointers to mimpap levels */ | |||
LLVMValueRef | |||
(*data_ptr)( const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, | |||
unsigned unit); | |||
/** Obtain texture min lod (returns float) */ | |||
LLVMValueRef | |||
(*min_lod)(const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, unsigned unit); | |||
/** Obtain texture max lod (returns float) */ | |||
LLVMValueRef | |||
(*max_lod)(const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, unsigned unit); | |||
/** Obtain texture lod bias (returns float) */ | |||
LLVMValueRef | |||
(*lod_bias)(const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, unsigned unit); | |||
/** Obtain texture border color (returns ptr to float[4]) */ | |||
LLVMValueRef | |||
(*border_color)(const struct lp_sampler_dynamic_state *state, | |||
LLVMBuilderRef builder, unsigned unit); | |||
}; | |||
@@ -159,10 +179,16 @@ struct lp_build_sample_context | |||
const struct util_format_description *format_desc; | |||
/* See texture_dims() */ | |||
unsigned dims; | |||
/** regular scalar float type */ | |||
struct lp_type float_type; | |||
struct lp_build_context float_bld; | |||
/** float vector type */ | |||
struct lp_build_context float_vec_bld; | |||
/** regular scalar float type */ | |||
struct lp_type int_type; | |||
struct lp_build_context int_bld; | |||
@@ -171,17 +197,32 @@ struct lp_build_sample_context | |||
struct lp_type coord_type; | |||
struct lp_build_context coord_bld; | |||
/** Unsigned integer coordinates */ | |||
struct lp_type uint_coord_type; | |||
struct lp_build_context uint_coord_bld; | |||
/** Signed integer coordinates */ | |||
struct lp_type int_coord_type; | |||
struct lp_build_context int_coord_bld; | |||
/** Unsigned integer texture size */ | |||
struct lp_type int_size_type; | |||
struct lp_build_context int_size_bld; | |||
/** Unsigned integer texture size */ | |||
struct lp_type float_size_type; | |||
struct lp_build_context float_size_bld; | |||
/** Output texels type and build context */ | |||
struct lp_type texel_type; | |||
struct lp_build_context texel_bld; | |||
/* Common dynamic state values */ | |||
LLVMValueRef width; | |||
LLVMValueRef height; | |||
LLVMValueRef depth; | |||
LLVMValueRef row_stride_array; | |||
LLVMValueRef img_stride_array; | |||
LLVMValueRef data_array; | |||
/** Integer vector with texture width, height, depth */ | |||
LLVMValueRef int_size; | |||
}; | |||
@@ -218,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld, | |||
} | |||
static INLINE int | |||
static INLINE unsigned | |||
texture_dims(enum pipe_texture_target tex) | |||
{ | |||
switch (tex) { | |||
@@ -237,6 +278,11 @@ texture_dims(enum pipe_texture_target tex) | |||
} | |||
boolean | |||
lp_sampler_wrap_mode_uses_border_color(unsigned mode, | |||
unsigned min_img_filter, | |||
unsigned mag_img_filter); | |||
/** | |||
* Derive the sampler static state. | |||
*/ | |||
@@ -246,15 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, | |||
const struct pipe_sampler_state *sampler); | |||
LLVMValueRef | |||
void | |||
lp_build_lod_selector(struct lp_build_sample_context *bld, | |||
unsigned unit, | |||
const LLVMValueRef ddx[4], | |||
const LLVMValueRef ddy[4], | |||
LLVMValueRef lod_bias, /* optional */ | |||
LLVMValueRef explicit_lod, /* optional */ | |||
LLVMValueRef width, | |||
LLVMValueRef height, | |||
LLVMValueRef depth); | |||
unsigned mip_filter, | |||
LLVMValueRef *out_lod_ipart, | |||
LLVMValueRef *out_lod_fpart); | |||
void | |||
lp_build_nearest_mip_level(struct lp_build_sample_context *bld, | |||
@@ -265,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, | |||
void | |||
lp_build_linear_mip_levels(struct lp_build_sample_context *bld, | |||
unsigned unit, | |||
LLVMValueRef lod, | |||
LLVMValueRef lod_ipart, | |||
LLVMValueRef *lod_fpart_inout, | |||
LLVMValueRef *level0_out, | |||
LLVMValueRef *level1_out, | |||
LLVMValueRef *weight_out); | |||
LLVMValueRef *level1_out); | |||
LLVMValueRef | |||
lp_build_get_mipmap_level(struct lp_build_sample_context *bld, | |||
LLVMValueRef data_array, LLVMValueRef level); | |||
LLVMValueRef level); | |||
LLVMValueRef | |||
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, | |||
LLVMValueRef data_array, int level); | |||
int level); | |||
void | |||
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, | |||
unsigned dims, | |||
LLVMValueRef width_vec, | |||
LLVMValueRef height_vec, | |||
LLVMValueRef depth_vec, | |||
LLVMValueRef ilevel0, | |||
LLVMValueRef ilevel1, | |||
LLVMValueRef row_stride_array, | |||
LLVMValueRef img_stride_array, | |||
LLVMValueRef *width0_vec, | |||
LLVMValueRef *width1_vec, | |||
LLVMValueRef *height0_vec, | |||
LLVMValueRef *height1_vec, | |||
LLVMValueRef *depth0_vec, | |||
LLVMValueRef *depth1_vec, | |||
LLVMValueRef *row_stride0_vec, | |||
LLVMValueRef *row_stride1_vec, | |||
LLVMValueRef *img_stride0_vec, | |||
LLVMValueRef *img_stride1_vec); | |||
LLVMValueRef ilevel, | |||
LLVMValueRef *out_size_vec, | |||
LLVMValueRef *row_stride_vec, | |||
LLVMValueRef *img_stride_vec); | |||
void | |||
lp_build_extract_image_sizes(struct lp_build_sample_context *bld, | |||
struct lp_type size_type, | |||
struct lp_type coord_type, | |||
LLVMValueRef size, | |||
LLVMValueRef *out_width, | |||
LLVMValueRef *out_height, | |||
LLVMValueRef *out_depth); | |||
void | |||
lp_build_unnormalized_coords(struct lp_build_sample_context *bld, | |||
LLVMValueRef flt_size, | |||
LLVMValueRef *s, | |||
LLVMValueRef *t, | |||
LLVMValueRef *r); | |||
void |
@@ -45,6 +45,7 @@ | |||
#include "lp_bld_const.h" | |||
#include "lp_bld_conv.h" | |||
#include "lp_bld_arit.h" | |||
#include "lp_bld_bitarit.h" | |||
#include "lp_bld_logic.h" | |||
#include "lp_bld_swizzle.h" | |||
#include "lp_bld_pack.h" | |||
@@ -80,20 +81,21 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, | |||
LLVMValueRef *out_offset, | |||
LLVMValueRef *out_i) | |||
{ | |||
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; | |||
struct lp_build_context *int_coord_bld = &bld->int_coord_bld; | |||
LLVMValueRef length_minus_one; | |||
length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); | |||
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); | |||
switch(wrap_mode) { | |||
case PIPE_TEX_WRAP_REPEAT: | |||
if(is_pot) | |||
coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); | |||
else | |||
/* Signed remainder won't give the right results for negative | |||
* dividends but unsigned remainder does.*/ | |||
else { | |||
/* Add a bias to the texcoord to handle negative coords */ | |||
LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); | |||
coord = LLVMBuildAdd(bld->builder, coord, bias, ""); | |||
coord = LLVMBuildURem(bld->builder, coord, length, ""); | |||
} | |||
break; | |||
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: | |||
@@ -111,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, | |||
assert(0); | |||
} | |||
lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, | |||
lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, | |||
out_offset, out_i); | |||
} | |||
@@ -144,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, | |||
LLVMValueRef *i0, | |||
LLVMValueRef *i1) | |||
{ | |||
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; | |||
struct lp_build_context *int_coord_bld = &bld->int_coord_bld; | |||
LLVMValueRef length_minus_one; | |||
LLVMValueRef lmask, umask, mask; | |||
@@ -186,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, | |||
* multiplication. | |||
*/ | |||
*i0 = uint_coord_bld->zero; | |||
*i1 = uint_coord_bld->zero; | |||
*i0 = int_coord_bld->zero; | |||
*i1 = int_coord_bld->zero; | |||
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); | |||
@@ -197,17 +198,18 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, | |||
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); | |||
} | |||
else { | |||
/* Signed remainder won't give the right results for negative | |||
* dividends but unsigned remainder does.*/ | |||
/* Add a bias to the texcoord to handle negative coords */ | |||
LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); | |||
coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); | |||
coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); | |||
} | |||
mask = lp_build_compare(bld->builder, int_coord_bld->type, | |||
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); | |||
*offset0 = lp_build_mul(uint_coord_bld, coord0, stride); | |||
*offset0 = lp_build_mul(int_coord_bld, coord0, stride); | |||
*offset1 = LLVMBuildAnd(bld->builder, | |||
lp_build_add(uint_coord_bld, *offset0, stride), | |||
lp_build_add(int_coord_bld, *offset0, stride), | |||
mask, ""); | |||
break; | |||
@@ -222,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, | |||
mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); | |||
*offset0 = lp_build_mul(uint_coord_bld, coord0, stride); | |||
*offset1 = lp_build_add(uint_coord_bld, | |||
*offset0 = lp_build_mul(int_coord_bld, coord0, stride); | |||
*offset1 = lp_build_add(int_coord_bld, | |||
*offset0, | |||
LLVMBuildAnd(bld->builder, stride, mask, "")); | |||
break; | |||
@@ -236,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, | |||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: | |||
default: | |||
assert(0); | |||
*offset0 = uint_coord_bld->zero; | |||
*offset1 = uint_coord_bld->zero; | |||
*offset0 = int_coord_bld->zero; | |||
*offset1 = int_coord_bld->zero; | |||
break; | |||
} | |||
} | |||
@@ -250,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, | |||
*/ | |||
static void | |||
lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
LLVMValueRef width_vec, | |||
LLVMValueRef height_vec, | |||
LLVMValueRef depth_vec, | |||
LLVMValueRef int_size, | |||
LLVMValueRef row_stride_vec, | |||
LLVMValueRef img_stride_vec, | |||
LLVMValueRef data_ptr, | |||
@@ -262,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
LLVMValueRef *colors_lo, | |||
LLVMValueRef *colors_hi) | |||
{ | |||
const int dims = texture_dims(bld->static_state->target); | |||
const unsigned dims = bld->dims; | |||
LLVMBuilderRef builder = bld->builder; | |||
struct lp_build_context i32, h16, u8n; | |||
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; | |||
LLVMValueRef i32_c8; | |||
LLVMValueRef width_vec, height_vec, depth_vec; | |||
LLVMValueRef s_ipart, t_ipart, r_ipart; | |||
LLVMValueRef x_stride; | |||
LLVMValueRef x_offset, offset; | |||
@@ -280,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
h16_vec_type = lp_build_vec_type(h16.type); | |||
u8n_vec_type = lp_build_vec_type(u8n.type); | |||
lp_build_extract_image_sizes(bld, | |||
bld->int_size_type, | |||
bld->int_coord_type, | |||
int_size, | |||
&width_vec, | |||
&height_vec, | |||
&depth_vec); | |||
if (bld->static_state->normalized_coords) { | |||
/* s = s * width, t = t * height */ | |||
LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); | |||
LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, | |||
coord_vec_type, ""); | |||
s = lp_build_mul(&bld->coord_bld, s, fp_width); | |||
if (dims >= 2) { | |||
LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, | |||
coord_vec_type, ""); | |||
t = lp_build_mul(&bld->coord_bld, t, fp_height); | |||
if (dims >= 3) { | |||
LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, | |||
coord_vec_type, ""); | |||
r = lp_build_mul(&bld->coord_bld, r, fp_depth); | |||
} | |||
} | |||
} | |||
LLVMValueRef scaled_size; | |||
LLVMValueRef flt_size; | |||
/* scale coords by 256 (8 fractional bits) */ | |||
s = lp_build_mul_imm(&bld->coord_bld, s, 256); | |||
if (dims >= 2) | |||
t = lp_build_mul_imm(&bld->coord_bld, t, 256); | |||
if (dims >= 3) | |||
r = lp_build_mul_imm(&bld->coord_bld, r, 256); | |||
/* scale size by 256 (8 fractional bits) */ | |||
scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); | |||
flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); | |||
lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); | |||
} | |||
else { | |||
/* scale coords by 256 (8 fractional bits) */ | |||
s = lp_build_mul_imm(&bld->coord_bld, s, 256); | |||
if (dims >= 2) | |||
t = lp_build_mul_imm(&bld->coord_bld, t, 256); | |||
if (dims >= 3) | |||
r = lp_build_mul_imm(&bld->coord_bld, r, 256); | |||
} | |||
/* convert float to int */ | |||
s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); | |||
@@ -321,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); | |||
/* get pixel, row, image strides */ | |||
x_stride = lp_build_const_vec(bld->uint_coord_bld.type, | |||
x_stride = lp_build_const_vec(bld->int_coord_bld.type, | |||
bld->format_desc->block.bits/8); | |||
/* Do texcoord wrapping, compute texel offset */ | |||
@@ -340,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
bld->static_state->pot_height, | |||
bld->static_state->wrap_t, | |||
&y_offset, &y_subcoord); | |||
offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); | |||
offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); | |||
if (dims >= 3) { | |||
LLVMValueRef z_offset; | |||
lp_build_sample_wrap_nearest_int(bld, | |||
@@ -349,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
bld->static_state->pot_height, | |||
bld->static_state->wrap_r, | |||
&z_offset, &z_subcoord); | |||
offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); | |||
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); | |||
} | |||
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { | |||
LLVMValueRef z_offset; | |||
/* The r coord is the cube face in [0,5] */ | |||
z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); | |||
offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); | |||
z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); | |||
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); | |||
} | |||
} | |||
@@ -414,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, | |||
*/ | |||
static void | |||
lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
LLVMValueRef width_vec, | |||
LLVMValueRef height_vec, | |||
LLVMValueRef depth_vec, | |||
LLVMValueRef int_size, | |||
LLVMValueRef row_stride_vec, | |||
LLVMValueRef img_stride_vec, | |||
LLVMValueRef data_ptr, | |||
@@ -426,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
LLVMValueRef *colors_lo, | |||
LLVMValueRef *colors_hi) | |||
{ | |||
const int dims = texture_dims(bld->static_state->target); | |||
const unsigned dims = bld->dims; | |||
LLVMBuilderRef builder = bld->builder; | |||
struct lp_build_context i32, h16, u8n; | |||
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; | |||
LLVMValueRef i32_c8, i32_c128, i32_c255; | |||
LLVMValueRef width_vec, height_vec, depth_vec; | |||
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; | |||
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; | |||
LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; | |||
@@ -455,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
h16_vec_type = lp_build_vec_type(h16.type); | |||
u8n_vec_type = lp_build_vec_type(u8n.type); | |||
lp_build_extract_image_sizes(bld, | |||
bld->int_size_type, | |||
bld->int_coord_type, | |||
int_size, | |||
&width_vec, | |||
&height_vec, | |||
&depth_vec); | |||
if (bld->static_state->normalized_coords) { | |||
/* s = s * width, t = t * height */ | |||
LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); | |||
LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, | |||
coord_vec_type, ""); | |||
s = lp_build_mul(&bld->coord_bld, s, fp_width); | |||
if (dims >= 2) { | |||
LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, | |||
coord_vec_type, ""); | |||
t = lp_build_mul(&bld->coord_bld, t, fp_height); | |||
} | |||
if (dims >= 3) { | |||
LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, | |||
coord_vec_type, ""); | |||
r = lp_build_mul(&bld->coord_bld, r, fp_depth); | |||
} | |||
} | |||
LLVMValueRef scaled_size; | |||
LLVMValueRef flt_size; | |||
/* scale coords by 256 (8 fractional bits) */ | |||
s = lp_build_mul_imm(&bld->coord_bld, s, 256); | |||
if (dims >= 2) | |||
t = lp_build_mul_imm(&bld->coord_bld, t, 256); | |||
if (dims >= 3) | |||
r = lp_build_mul_imm(&bld->coord_bld, r, 256); | |||
/* scale size by 256 (8 fractional bits) */ | |||
scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); | |||
flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); | |||
lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); | |||
} | |||
else { | |||
/* scale coords by 256 (8 fractional bits) */ | |||
s = lp_build_mul_imm(&bld->coord_bld, s, 256); | |||
if (dims >= 2) | |||
t = lp_build_mul_imm(&bld->coord_bld, t, 256); | |||
if (dims >= 3) | |||
r = lp_build_mul_imm(&bld->coord_bld, r, 256); | |||
} | |||
/* convert float to int */ | |||
s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); | |||
@@ -489,10 +495,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
/* subtract 0.5 (add -128) */ | |||
i32_c128 = lp_build_const_int_vec(i32.type, -128); | |||
if (!bld->static_state->force_nearest_s) { | |||
s = LLVMBuildAdd(builder, s, i32_c128, ""); | |||
} | |||
if (dims >= 2 && !bld->static_state->force_nearest_t) { | |||
s = LLVMBuildAdd(builder, s, i32_c128, ""); | |||
if (dims >= 2) { | |||
t = LLVMBuildAdd(builder, t, i32_c128, ""); | |||
} | |||
if (dims >= 3) { | |||
@@ -516,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); | |||
/* get pixel, row and image strides */ | |||
x_stride = lp_build_const_vec(bld->uint_coord_bld.type, | |||
x_stride = lp_build_const_vec(bld->int_coord_bld.type, | |||
bld->format_desc->block.bits/8); | |||
y_stride = row_stride_vec; | |||
z_stride = img_stride_vec; | |||
@@ -547,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
for (z = 0; z < 2; z++) { | |||
for (x = 0; x < 2; x++) { | |||
offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, | |||
offset[z][0][x] = lp_build_add(&bld->int_coord_bld, | |||
offset[z][0][x], y_offset0); | |||
offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, | |||
offset[z][1][x] = lp_build_add(&bld->int_coord_bld, | |||
offset[z][1][x], y_offset1); | |||
} | |||
} | |||
@@ -565,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
&z_subcoord[0], &z_subcoord[1]); | |||
for (y = 0; y < 2; y++) { | |||
for (x = 0; x < 2; x++) { | |||
offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, | |||
offset[0][y][x] = lp_build_add(&bld->int_coord_bld, | |||
offset[0][y][x], z_offset0); | |||
offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, | |||
offset[1][y][x] = lp_build_add(&bld->int_coord_bld, | |||
offset[1][y][x], z_offset1); | |||
} | |||
} | |||
} | |||
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { | |||
LLVMValueRef z_offset; | |||
z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); | |||
z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); | |||
for (y = 0; y < 2; y++) { | |||
for (x = 0; x < 2; x++) { | |||
/* The r coord is the cube face in [0,5] */ | |||
offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, | |||
offset[0][y][x] = lp_build_add(&bld->int_coord_bld, | |||
offset[0][y][x], z_offset); | |||
} | |||
} | |||
@@ -709,82 +713,56 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, | |||
/* | |||
* Linear interpolation with 8.8 fixed point. | |||
*/ | |||
if (bld->static_state->force_nearest_s) { | |||
/* special case 1-D lerp */ | |||
packed_lo = lp_build_lerp(&h16, | |||
t_fpart_lo, | |||
neighbors_lo[0][0][0], | |||
neighbors_lo[0][0][1]); | |||
packed_hi = lp_build_lerp(&h16, | |||
t_fpart_hi, | |||
neighbors_hi[0][1][0], | |||
neighbors_hi[0][1][0]); | |||
} | |||
else if (bld->static_state->force_nearest_t) { | |||
/* special case 1-D lerp */ | |||
if (dims == 1) { | |||
/* 1-D lerp */ | |||
packed_lo = lp_build_lerp(&h16, | |||
s_fpart_lo, | |||
neighbors_lo[0][0][0], | |||
neighbors_lo[0][0][1]); | |||
s_fpart_lo, | |||
neighbors_lo[0][0][0], | |||
neighbors_lo[0][0][1]); | |||
packed_hi = lp_build_lerp(&h16, | |||
s_fpart_hi, | |||
neighbors_hi[0][0][0], | |||
neighbors_hi[0][0][1]); | |||
s_fpart_hi, | |||
neighbors_hi[0][0][0], | |||
neighbors_hi[0][0][1]); | |||
} | |||
else { | |||
/* general 1/2/3-D lerping */ | |||
if (dims == 1) { | |||
packed_lo = lp_build_lerp(&h16, | |||
s_fpart_lo, | |||
neighbors_lo[0][0][0], | |||
neighbors_lo[0][0][1]); | |||
packed_hi = lp_build_lerp(&h16, | |||
s_fpart_hi, | |||
neighbors_hi[0][0][0], | |||
neighbors_hi[0][0][1]); | |||
} | |||
else { | |||
/* 2-D lerp */ | |||
packed_lo = lp_build_lerp_2d(&h16, | |||
s_fpart_lo, t_fpart_lo, | |||
neighbors_lo[0][0][0], | |||
neighbors_lo[0][0][1], | |||
neighbors_lo[0][1][0], | |||
neighbors_lo[0][1][1]); | |||
packed_hi = lp_build_lerp_2d(&h16, | |||
s_fpart_hi, t_fpart_hi, | |||
neighbors_hi[0][0][0], | |||
neighbors_hi[0][0][1], | |||
neighbors_hi[0][1][0], | |||
neighbors_hi[0][1][1]); | |||
if (dims >= 3) { | |||
LLVMValueRef packed_lo2, packed_hi2; | |||
/* lerp in the second z slice */ | |||
packed_lo2 = lp_build_lerp_2d(&h16, | |||
s_fpart_lo, t_fpart_lo, | |||
neighbors_lo[1][0][0], | |||
neighbors_lo[1][0][1], | |||
neighbors_lo[1][1][0], | |||
neighbors_lo[1][1][1]); | |||
packed_hi2 = lp_build_lerp_2d(&h16, | |||
s_fpart_hi, t_fpart_hi, | |||
neighbors_hi[1][0][0], | |||
neighbors_hi[1][0][1], | |||
neighbors_hi[1][1][0], | |||
neighbors_hi[1][1][1]); | |||
/* interp between two z slices */ | |||
packed_lo = lp_build_lerp(&h16, r_fpart_lo, | |||
packed_lo, packed_lo2); | |||
packed_hi = lp_build_lerp(&h16, r_fpart_hi, | |||
packed_hi, packed_hi2); | |||
} | |||
/* 2-D lerp */ | |||
packed_lo = lp_build_lerp_2d(&h16, | |||
s_fpart_lo, t_fpart_lo, | |||
neighbors_lo[0][0][0], | |||
neighbors_lo[0][0][1], | |||
neighbors_lo[0][1][0], | |||
neighbors_lo[0][1][1]); | |||
packed_hi = lp_build_lerp_2d(&h16, | |||
s_fpart_hi, t_fpart_hi, | |||
neighbors_hi[0][0][0], | |||
neighbors_hi[0][0][1], | |||
neighbors_hi[0][1][0], | |||
neighbors_hi[0][1][1]); | |||
if (dims >= 3) { | |||
LLVMValueRef packed_lo2, packed_hi2; | |||
/* lerp in the second z slice */ | |||
packed_lo2 = lp_build_lerp_2d(&h16, | |||
s_fpart_lo, t_fpart_lo, | |||
neighbors_lo[1][0][0], | |||
neighbors_lo[1][0][1], | |||
neighbors_lo[1][1][0], | |||
neighbors_lo[1][1][1]); | |||
packed_hi2 = lp_build_lerp_2d(&h16, | |||
s_fpart_hi, t_fpart_hi, | |||
neighbors_hi[1][0][0], | |||
neighbors_hi[1][0][1], | |||
neighbors_hi[1][1][0], | |||
neighbors_hi[1][1][1]); | |||
/* interp between two z slices */ | |||
packed_lo = lp_build_lerp(&h16, r_fpart_lo, | |||
packed_lo, packed_lo2); | |||
packed_hi = lp_build_lerp(&h16, r_fpart_hi, | |||
packed_hi, packed_hi2); | |||
} | |||
} | |||
@@ -806,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, | |||
LLVMValueRef s, | |||
LLVMValueRef t, | |||
LLVMValueRef r, | |||
LLVMValueRef ilevel0, | |||
LLVMValueRef ilevel1, | |||
LLVMValueRef lod_fpart, | |||
LLVMValueRef width0_vec, | |||
LLVMValueRef width1_vec, | |||
LLVMValueRef height0_vec, | |||
LLVMValueRef height1_vec, | |||
LLVMValueRef depth0_vec, | |||
LLVMValueRef depth1_vec, | |||
LLVMValueRef row_stride0_vec, | |||
LLVMValueRef row_stride1_vec, | |||
LLVMValueRef img_stride0_vec, | |||
LLVMValueRef img_stride1_vec, | |||
LLVMValueRef data_ptr0, | |||
LLVMValueRef data_ptr1, | |||
LLVMValueRef *colors_lo, | |||
LLVMValueRef *colors_hi) | |||
LLVMValueRef colors_lo_var, | |||
LLVMValueRef colors_hi_var) | |||
{ | |||
LLVMBuilderRef builder = bld->builder; | |||
LLVMValueRef size0; | |||
LLVMValueRef size1; | |||
LLVMValueRef row_stride0_vec; | |||
LLVMValueRef row_stride1_vec; | |||
LLVMValueRef img_stride0_vec; | |||
LLVMValueRef img_stride1_vec; | |||
LLVMValueRef data_ptr0; | |||
LLVMValueRef data_ptr1; | |||
LLVMValueRef colors0_lo, colors0_hi; | |||
LLVMValueRef colors1_lo, colors1_hi; | |||
/* sample the first mipmap level */ | |||
lp_build_mipmap_level_sizes(bld, ilevel0, | |||
&size0, | |||
&row_stride0_vec, &img_stride0_vec); | |||
data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); | |||
if (img_filter == PIPE_TEX_FILTER_NEAREST) { | |||
/* sample the first mipmap level */ | |||
lp_build_sample_image_nearest(bld, | |||
width0_vec, height0_vec, depth0_vec, | |||
size0, | |||
row_stride0_vec, img_stride0_vec, | |||
data_ptr0, s, t, r, | |||
&colors0_lo, &colors0_hi); | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { | |||
/* sample the second mipmap level */ | |||
lp_build_sample_image_nearest(bld, | |||
width1_vec, height1_vec, depth1_vec, | |||
row_stride1_vec, img_stride1_vec, | |||
data_ptr1, s, t, r, | |||
&colors1_lo, &colors1_hi); | |||
} | |||
} | |||
else { | |||
assert(img_filter == PIPE_TEX_FILTER_LINEAR); | |||
/* sample the first mipmap level */ | |||
lp_build_sample_image_linear(bld, | |||
width0_vec, height0_vec, depth0_vec, | |||
size0, | |||
row_stride0_vec, img_stride0_vec, | |||
data_ptr0, s, t, r, | |||
&colors0_lo, &colors0_hi); | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { | |||
/* sample the second mipmap level */ | |||
lp_build_sample_image_linear(bld, | |||
width1_vec, height1_vec, depth1_vec, | |||
row_stride1_vec, img_stride1_vec, | |||
data_ptr1, s, t, r, | |||
&colors1_lo, &colors1_hi); | |||
} | |||
} | |||
/* Store the first level's colors in the output variables */ | |||
LLVMBuildStore(builder, colors0_lo, colors_lo_var); | |||
LLVMBuildStore(builder, colors0_hi, colors_hi_var); | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { | |||
/* interpolate samples from the two mipmap levels */ | |||
struct lp_build_context h16; | |||
lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16)); | |||
*colors_lo = lp_build_lerp(&h16, lod_fpart, | |||
colors0_lo, colors1_lo); | |||
*colors_hi = lp_build_lerp(&h16, lod_fpart, | |||
colors0_hi, colors1_hi); | |||
} | |||
else { | |||
/* use first/only level's colors */ | |||
*colors_lo = colors0_lo; | |||
*colors_hi = colors0_hi; | |||
LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0); | |||
LLVMTypeRef i32_type = LLVMIntType(32); | |||
struct lp_build_if_state if_ctx; | |||
LLVMValueRef need_lerp; | |||
lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); | |||
lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); | |||
/* need_lerp = lod_fpart > 0 */ | |||
need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, | |||
lod_fpart, LLVMConstNull(i32_type), | |||
"need_lerp"); | |||
lp_build_if(&if_ctx, builder, need_lerp); | |||
{ | |||
struct lp_build_context h16_bld; | |||
lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); | |||
/* sample the second mipmap level */ | |||
lp_build_mipmap_level_sizes(bld, ilevel1, | |||
&size1, | |||
&row_stride1_vec, &img_stride1_vec); | |||
data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); | |||
if (img_filter == PIPE_TEX_FILTER_NEAREST) { | |||
lp_build_sample_image_nearest(bld, | |||
size1, | |||
row_stride1_vec, img_stride1_vec, | |||
data_ptr1, s, t, r, | |||
&colors1_lo, &colors1_hi); | |||
} | |||
else { | |||
lp_build_sample_image_linear(bld, | |||
size1, | |||
row_stride1_vec, img_stride1_vec, | |||
data_ptr1, s, t, r, | |||
&colors1_lo, &colors1_hi); | |||
} | |||
/* interpolate samples from the two mipmap levels */ | |||
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); | |||
lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); | |||
#if HAVE_LLVM == 0x208 | |||
/* This is a work-around for a bug in LLVM 2.8. | |||
* Evidently, something goes wrong in the construction of the | |||
* lod_fpart short[8] vector. Adding this no-effect shuffle seems | |||
* to force the vector to be properly constructed. | |||
* Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). | |||
*/ | |||
{ | |||
LLVMValueRef shuffles[8], shuffle; | |||
int i; | |||
assert(h16_bld.type.length <= Elements(shuffles)); | |||
for (i = 0; i < h16_bld.type.length; i++) | |||
shuffles[i] = lp_build_const_int32(2 * (i & 1)); | |||
shuffle = LLVMConstVector(shuffles, h16_bld.type.length); | |||
lod_fpart = LLVMBuildShuffleVector(builder, | |||
lod_fpart, lod_fpart, | |||
shuffle, ""); | |||
} | |||
#endif | |||
colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, | |||
colors0_lo, colors1_lo); | |||
colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, | |||
colors0_hi, colors1_hi); | |||
LLVMBuildStore(builder, colors0_lo, colors_lo_var); | |||
LLVMBuildStore(builder, colors0_hi, colors_hi_var); | |||
} | |||
lp_build_endif(&if_ctx); | |||
} | |||
} | |||
@@ -896,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, | |||
const LLVMValueRef *ddy, | |||
LLVMValueRef lod_bias, /* optional */ | |||
LLVMValueRef explicit_lod, /* optional */ | |||
LLVMValueRef width, | |||
LLVMValueRef height, | |||
LLVMValueRef depth, | |||
LLVMValueRef width_vec, | |||
LLVMValueRef height_vec, | |||
LLVMValueRef depth_vec, | |||
LLVMValueRef row_stride_array, | |||
LLVMValueRef img_stride_array, | |||
LLVMValueRef data_array, | |||
LLVMValueRef texel_out[4]) | |||
{ | |||
struct lp_build_context *float_bld = &bld->float_bld; | |||
struct lp_build_context *int_bld = &bld->int_bld; | |||
LLVMBuilderRef builder = bld->builder; | |||
const unsigned mip_filter = bld->static_state->min_mip_filter; | |||
const unsigned min_filter = bld->static_state->min_img_filter; | |||
const unsigned mag_filter = bld->static_state->mag_img_filter; | |||
const int dims = texture_dims(bld->static_state->target); | |||
LLVMValueRef lod = NULL, lod_fpart = NULL; | |||
const unsigned dims = bld->dims; | |||
LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; | |||
LLVMValueRef ilevel0, ilevel1 = NULL; | |||
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; | |||
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; | |||
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; | |||
LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; | |||
LLVMValueRef data_ptr0, data_ptr1 = NULL; | |||
LLVMValueRef packed, packed_lo, packed_hi; | |||
LLVMValueRef unswizzled[4]; | |||
LLVMValueRef face_ddx[4], face_ddy[4]; | |||
struct lp_build_context h16; | |||
LLVMTypeRef h16_vec_type; | |||
struct lp_build_context h16_bld; | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); | |||
/* we only support the common/simple wrap modes at this time */ | |||
assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); | |||
@@ -935,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, | |||
/* make 16-bit fixed-pt builder context */ | |||
lp_build_context_init(&h16, builder, lp_type_ufixed(16)); | |||
h16_vec_type = lp_build_vec_type(h16.type); | |||
lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); | |||
/* cube face selection, compute pre-face coords, etc. */ | |||
if (bld->static_state->target == PIPE_TEXTURE_CUBE) { | |||
@@ -949,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, | |||
r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ | |||
/* recompute ddx, ddy using the new (s,t) face texcoords */ | |||
face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); | |||
face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); | |||
face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); | |||
face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); | |||
face_ddx[2] = NULL; | |||
face_ddx[3] = NULL; | |||
face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); | |||
face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); | |||
face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); | |||
face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); | |||
face_ddy[2] = NULL; | |||
face_ddy[3] = NULL; | |||
ddx = face_ddx; | |||
ddy = face_ddy; | |||
} | |||
/* | |||
* Compute the level of detail (float). | |||
*/ | |||
@@ -970,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, | |||
/* Need to compute lod either to choose mipmap levels or to | |||
* distinguish between minification/magnification with one mipmap level. | |||
*/ | |||
lod = lp_build_lod_selector(bld, ddx, ddy, | |||
lod_bias, explicit_lod, | |||
width, height, depth); | |||
lp_build_lod_selector(bld, unit, ddx, ddy, | |||
lod_bias, explicit_lod, | |||
mip_filter, | |||
&lod_ipart, &lod_fpart); | |||
} else { | |||
lod_ipart = i32t_zero; | |||
} | |||
/* | |||
* Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 | |||
* If mipfilter=linear, also compute the weight between the two | |||
* mipmap levels: lod_fpart | |||
*/ | |||
switch (mip_filter) { | |||
default: | |||
@@ -991,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, | |||
* We should be able to set ilevel0 = const(0) but that causes | |||
* bad x86 code to be emitted. | |||
*/ | |||
lod = lp_build_const_elem(bld->coord_bld.type, 0.0); | |||
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); | |||
assert(lod_ipart); | |||
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); | |||
} | |||
else { | |||
ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); | |||
ilevel0 = i32t_zero; | |||
} | |||
break; | |||
case PIPE_TEX_MIPFILTER_NEAREST: | |||
assert(lod); | |||
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); | |||
assert(lod_ipart); | |||
lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); | |||
break; | |||
case PIPE_TEX_MIPFILTER_LINEAR: | |||
{ | |||
LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); | |||
LLVMValueRef i255 = lp_build_const_int32(255); | |||
LLVMTypeRef i16_type = LLVMIntType(16); | |||
assert(lod); | |||
lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, | |||
&lod_fpart); | |||
lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); | |||
lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); | |||
lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); | |||
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); | |||
lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); | |||
/* the lod_fpart values will be fixed pt values in [0,1) */ | |||
} | |||
assert(lod_ipart); | |||
assert(lod_fpart); | |||
lp_build_linear_mip_levels(bld, unit, | |||
lod_ipart, &lod_fpart, | |||
&ilevel0, &ilevel1); | |||
break; | |||
} | |||
/* compute image size(s) of source mipmap level(s) */ | |||
lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, | |||
ilevel0, ilevel1, | |||
row_stride_array, img_stride_array, | |||
&width0_vec, &width1_vec, | |||
&height0_vec, &height1_vec, | |||
&depth0_vec, &depth1_vec, | |||
&row_stride0_vec, &row_stride1_vec, | |||
&img_stride0_vec, &img_stride1_vec); | |||
/* | |||
* Get pointer(s) to image data for mipmap level(s). | |||
* Get/interpolate texture colors. | |||
*/ | |||
data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); | |||
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { | |||
data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); | |||
} | |||
packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo"); | |||
packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi"); | |||
/* | |||
* Get/interpolate texture colors. | |||
*/ | |||
if (min_filter == mag_filter) { | |||
/* no need to distinquish between minification and magnification */ | |||
lp_build_sample_mipmap(bld, min_filter, mip_filter, | |||
s, t, r, lod_fpart, | |||
width0_vec, width1_vec, | |||
height0_vec, height1_vec, | |||
depth0_vec, depth1_vec, | |||
row_stride0_vec, row_stride1_vec, | |||
img_stride0_vec, img_stride1_vec, | |||
data_ptr0, data_ptr1, | |||
&packed_lo, &packed_hi); | |||
lp_build_sample_mipmap(bld, | |||
min_filter, mip_filter, | |||
s, t, r, | |||
ilevel0, ilevel1, lod_fpart, | |||
packed_lo, packed_hi); | |||
} | |||
else { | |||
/* Emit conditional to choose min image filter or mag image filter | |||
* depending on the lod being > 0 or <= 0, respectively. | |||
*/ | |||
struct lp_build_flow_context *flow_ctx; | |||
struct lp_build_if_state if_ctx; | |||
LLVMValueRef minify; | |||
flow_ctx = lp_build_flow_create(builder); | |||
lp_build_flow_scope_begin(flow_ctx); | |||
packed_lo = LLVMGetUndef(h16_vec_type); | |||
packed_hi = LLVMGetUndef(h16_vec_type); | |||
/* minify = lod >= 0.0 */ | |||
minify = LLVMBuildICmp(builder, LLVMIntSGE, | |||
lod_ipart, int_bld->zero, ""); | |||
lp_build_flow_scope_declare(flow_ctx, &packed_lo); | |||
lp_build_flow_scope_declare(flow_ctx, &packed_hi); | |||
/* minify = lod > 0.0 */ | |||
minify = LLVMBuildFCmp(builder, LLVMRealUGE, | |||
lod, float_bld->zero, ""); | |||
lp_build_if(&if_ctx, flow_ctx, builder, minify); | |||
lp_build_if(&if_ctx, builder, minify); | |||
{ | |||
/* Use the minification filter */ | |||
lp_build_sample_mipmap(bld, min_filter, mip_filter, | |||
s, t, r, lod_fpart, | |||
width0_vec, width1_vec, | |||
height0_vec, height1_vec, | |||
depth0_vec, depth1_vec, | |||
row_stride0_vec, row_stride1_vec, | |||
img_stride0_vec, img_stride1_vec, | |||
data_ptr0, data_ptr1, | |||
&packed_lo, &packed_hi); | |||
lp_build_sample_mipmap(bld, | |||
min_filter, mip_filter, | |||
s, t, r, | |||
ilevel0, ilevel1, lod_fpart, | |||
packed_lo, packed_hi); | |||
} | |||
lp_build_else(&if_ctx); | |||
{ | |||
/* Use the magnification filter */ | |||
lp_build_sample_mipmap(bld, mag_filter, mip_filter, | |||
s, t, r, lod_fpart, | |||
width0_vec, width1_vec, | |||
height0_vec, height1_vec, | |||
depth0_vec, depth1_vec, | |||
row_stride0_vec, row_stride1_vec, | |||
img_stride0_vec, img_stride1_vec, | |||
data_ptr0, data_ptr1, | |||
&packed_lo, &packed_hi); | |||
lp_build_sample_mipmap(bld, | |||
mag_filter, PIPE_TEX_MIPFILTER_NONE, | |||
s, t, r, | |||
i32t_zero, NULL, NULL, | |||
packed_lo, packed_hi); | |||
} | |||
lp_build_endif(&if_ctx); | |||
lp_build_flow_scope_end(flow_ctx); | |||
lp_build_flow_destroy(flow_ctx); | |||
} | |||
/* combine 'packed_lo', 'packed_hi' into 'packed' */ | |||
{ | |||
struct lp_build_context h16, u8n; | |||
lp_build_context_init(&h16, builder, lp_type_ufixed(16)); | |||
lp_build_context_init(&u8n, builder, lp_type_unorm(8)); | |||
packed = lp_build_pack2(builder, h16.type, u8n.type, | |||
packed_lo, packed_hi); | |||
} | |||
/* | |||
* combine the values stored in 'packed_lo' and 'packed_hi' variables | |||
* into 'packed' | |||
*/ | |||
packed = lp_build_pack2(builder, | |||
h16_bld.type, lp_type_unorm(8), | |||
LLVMBuildLoad(builder, packed_lo, ""), | |||
LLVMBuildLoad(builder, packed_hi, "")); | |||
/* | |||
* Convert to SoA and swizzle. |
@@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, | |||
const LLVMValueRef *ddy, | |||
LLVMValueRef lod_bias, /* optional */ | |||
LLVMValueRef explicit_lod, /* optional */ | |||
LLVMValueRef width, | |||
LLVMValueRef height, | |||
LLVMValueRef depth, | |||
LLVMValueRef width_vec, | |||
LLVMValueRef height_vec, | |||
LLVMValueRef depth_vec, | |||
LLVMValueRef row_stride_array, | |||
LLVMValueRef img_stride_array, | |||
LLVMValueRef data_array, | |||
LLVMValueRef texel_out[4]); | |||
@@ -100,6 +100,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, | |||
} | |||
/** | |||
* Combined extract and broadcast (or a mere shuffle when the two types match) | |||
*/ | |||
LLVMValueRef | |||
lp_build_extract_broadcast(LLVMBuilderRef builder, | |||
struct lp_type src_type, | |||
struct lp_type dst_type, | |||
LLVMValueRef vector, | |||
LLVMValueRef index) | |||
{ | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMValueRef res; | |||
assert(src_type.floating == dst_type.floating); | |||
assert(src_type.width == dst_type.width); | |||
assert(lp_check_value(src_type, vector)); | |||
assert(LLVMTypeOf(index) == i32t); | |||
if (src_type.length == 1) { | |||
if (dst_type.length == 1) { | |||
/* | |||
* Trivial scalar -> scalar. | |||
*/ | |||
res = vector; | |||
} | |||
else { | |||
/* | |||
* Broadcast scalar -> vector. | |||
*/ | |||
res = lp_build_broadcast(builder, | |||
lp_build_vec_type(dst_type), | |||
vector); | |||
} | |||
} | |||
else { | |||
if (dst_type.length == src_type.length) { | |||
/* | |||
* Special shuffle of the same size. | |||
*/ | |||
LLVMValueRef shuffle; | |||
shuffle = lp_build_broadcast(builder, | |||
LLVMVectorType(i32t, dst_type.length), | |||
index); | |||
res = LLVMBuildShuffleVector(builder, vector, | |||
LLVMGetUndef(lp_build_vec_type(dst_type)), | |||
shuffle, ""); | |||
} | |||
else { | |||
LLVMValueRef scalar; | |||
scalar = LLVMBuildExtractElement(builder, vector, index, ""); | |||
if (dst_type.length == 1) { | |||
/* | |||
* Trivial extract scalar from vector. | |||
*/ | |||
res = scalar; | |||
} | |||
else { | |||
/* | |||
* General case of different sized vectors. | |||
*/ | |||
res = lp_build_broadcast(builder, | |||
lp_build_vec_type(dst_type), | |||
vector); | |||
} | |||
} | |||
} | |||
return res; | |||
} | |||
/** | |||
* Swizzle one channel into all other three channels. | |||
*/ |
@@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, | |||
LLVMValueRef scalar); | |||
LLVMValueRef | |||
lp_build_extract_broadcast(LLVMBuilderRef builder, | |||
struct lp_type src_type, | |||
struct lp_type dst_type, | |||
LLVMValueRef vector, | |||
LLVMValueRef index); | |||
/** | |||
* Broadcast one channel of a vector composed of arrays of XYZW structures into | |||
* all four channel. |
@@ -36,6 +36,9 @@ | |||
#define LP_BLD_TGSI_H | |||
#include "gallivm/lp_bld.h" | |||
#include "pipe/p_compiler.h" | |||
#include "pipe/p_state.h" | |||
#include "tgsi/tgsi_scan.h" | |||
struct tgsi_token; | |||
@@ -54,6 +57,75 @@ enum lp_build_tex_modifier { | |||
}; | |||
/** | |||
* Describe a channel of a register. | |||
* | |||
* The value can be a: | |||
* - immediate value (i.e. derived from a IMM register) | |||
* - CONST[n].x/y/z/w | |||
* - IN[n].x/y/z/w | |||
* - undetermined (when .file == TGSI_FILE_NULL) | |||
* | |||
* This is one of the analysis results, and is used to described | |||
* the output color in terms of inputs. | |||
*/ | |||
struct lp_tgsi_channel_info | |||
{ | |||
unsigned file:4; /* TGSI_FILE_* */ | |||
unsigned swizzle:3; /* PIPE_SWIZZLE_x */ | |||
union { | |||
uint32_t index; | |||
float value; /* for TGSI_FILE_IMMEDIATE */ | |||
} u; | |||
}; | |||
/** | |||
* Describe a texture sampler interpolator. | |||
* | |||
* The interpolation is described in terms of regular inputs. | |||
*/ | |||
struct lp_tgsi_texture_info | |||
{ | |||
struct lp_tgsi_channel_info coord[4]; | |||
unsigned target:8; /* TGSI_TEXTURE_* */ | |||
unsigned unit:8; /* Sampler unit */ | |||
unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */ | |||
}; | |||
struct lp_tgsi_info | |||
{ | |||
struct tgsi_shader_info base; | |||
/* | |||
* Whether any of the texture opcodes access a register file other than | |||
* TGSI_FILE_INPUT. | |||
* | |||
* We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little | |||
* benefit. | |||
*/ | |||
unsigned indirect_textures:1; | |||
/* | |||
* Texture opcode description. Aimed at detecting and described direct | |||
* texture opcodes. | |||
*/ | |||
unsigned num_texs; | |||
struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS]; | |||
/* | |||
* Output description. Aimed at detecting and describing simple blit | |||
* shaders. | |||
*/ | |||
struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4]; | |||
/* | |||
* Shortcut pointers into the above (for fragment shaders). | |||
*/ | |||
const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS]; | |||
}; | |||
/** | |||
* Sampler code generation interface. | |||
* | |||
@@ -96,6 +168,11 @@ struct lp_build_sampler_aos | |||
}; | |||
void | |||
lp_build_tgsi_info(const struct tgsi_token *tokens, | |||
struct lp_tgsi_info *info); | |||
void | |||
lp_build_tgsi_soa(LLVMBuilderRef builder, | |||
const struct tgsi_token *tokens, |
@@ -0,0 +1,479 @@ | |||
/************************************************************************** | |||
* | |||
* Copyright 2010 VMware, Inc. | |||
* All Rights Reserved. | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sub license, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |||
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* The above copyright notice and this permission notice (including the | |||
* next paragraph) shall be included in all copies or substantial portions | |||
* of the Software. | |||
* | |||
**************************************************************************/ | |||
#include "util/u_memory.h" | |||
#include "util/u_math.h" | |||
#include "tgsi/tgsi_parse.h" | |||
#include "tgsi/tgsi_util.h" | |||
#include "tgsi/tgsi_dump.h" | |||
#include "lp_bld_debug.h" | |||
#include "lp_bld_tgsi.h" | |||
/** | |||
* Analysis context. | |||
* | |||
* This is where we keep store the value of each channel of the IMM/TEMP/OUT | |||
* register values, as we walk the shader. | |||
*/ | |||
struct analysis_context | |||
{ | |||
struct lp_tgsi_info *info; | |||
unsigned num_imms; | |||
float imm[32][4]; | |||
struct lp_tgsi_channel_info temp[32][4]; | |||
}; | |||
/** | |||
* Describe the specified channel of the src register. | |||
*/ | |||
static void | |||
analyse_src(struct analysis_context *ctx, | |||
struct lp_tgsi_channel_info *chan_info, | |||
const struct tgsi_src_register *src, | |||
unsigned chan) | |||
{ | |||
chan_info->file = TGSI_FILE_NULL; | |||
if (!src->Indirect && !src->Absolute && !src->Negate) { | |||
unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan); | |||
if (src->File == TGSI_FILE_TEMPORARY) { | |||
if (src->Index < Elements(ctx->temp)) { | |||
*chan_info = ctx->temp[src->Index][swizzle]; | |||
} | |||
} else { | |||
chan_info->file = src->File; | |||
if (src->File == TGSI_FILE_IMMEDIATE) { | |||
assert(src->Index < Elements(ctx->imm)); | |||
if (src->Index < Elements(ctx->imm)) { | |||
chan_info->u.value = ctx->imm[src->Index][swizzle]; | |||
} | |||
} else { | |||
chan_info->u.index = src->Index; | |||
chan_info->swizzle = swizzle; | |||
} | |||
} | |||
} | |||
} | |||
/** | |||
* Whether this register channel refers to a specific immediate value. | |||
*/ | |||
static boolean | |||
is_immediate(const struct lp_tgsi_channel_info *chan_info, float value) | |||
{ | |||
return chan_info->file == TGSI_FILE_IMMEDIATE && | |||
chan_info->u.value == value; | |||
} | |||
static void | |||
analyse_tex(struct analysis_context *ctx, | |||
const struct tgsi_full_instruction *inst, | |||
enum lp_build_tex_modifier modifier) | |||
{ | |||
struct lp_tgsi_info *info = ctx->info; | |||
unsigned chan; | |||
if (info->num_texs < Elements(info->tex)) { | |||
struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs]; | |||
bool indirect = FALSE; | |||
unsigned readmask = 0; | |||
tex_info->target = inst->Texture.Texture; | |||
switch (inst->Texture.Texture) { | |||
case TGSI_TEXTURE_1D: | |||
readmask = TGSI_WRITEMASK_X; | |||
break; | |||
case TGSI_TEXTURE_2D: | |||
case TGSI_TEXTURE_RECT: | |||
readmask = TGSI_WRITEMASK_XY; | |||
break; | |||
case TGSI_TEXTURE_SHADOW1D: | |||
case TGSI_TEXTURE_SHADOW2D: | |||
case TGSI_TEXTURE_SHADOWRECT: | |||
case TGSI_TEXTURE_3D: | |||
case TGSI_TEXTURE_CUBE: | |||
readmask = TGSI_WRITEMASK_XYZ; | |||
break; | |||
default: | |||
assert(0); | |||
return; | |||
} | |||
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { | |||
/* We don't track explicit derivatives, although we could */ | |||
indirect = TRUE; | |||
tex_info->unit = inst->Src[3].Register.Index; | |||
} else { | |||
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED || | |||
modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || | |||
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { | |||
readmask |= TGSI_WRITEMASK_W; | |||
} | |||
tex_info->unit = inst->Src[1].Register.Index; | |||
} | |||
for (chan = 0; chan < 4; ++chan) { | |||
struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan]; | |||
if (readmask & (1 << chan)) { | |||
analyse_src(ctx, chan_info, &inst->Src[0].Register, chan); | |||
if (chan_info->file != TGSI_FILE_INPUT) { | |||
indirect = TRUE; | |||
} | |||
} else { | |||
memset(chan_info, 0, sizeof *chan_info); | |||
} | |||
} | |||
if (indirect) { | |||
info->indirect_textures = TRUE; | |||
} | |||
++info->num_texs; | |||
} else { | |||
info->indirect_textures = TRUE; | |||
} | |||
} | |||
/** | |||
* Process an instruction, and update the register values accordingly. | |||
*/ | |||
static void | |||
analyse_instruction(struct analysis_context *ctx, | |||
struct tgsi_full_instruction *inst) | |||
{ | |||
struct lp_tgsi_info *info = ctx->info; | |||
struct lp_tgsi_channel_info (*regs)[4]; | |||
unsigned max_regs; | |||
unsigned i; | |||
unsigned index; | |||
unsigned chan; | |||
for (i = 0; i < inst->Instruction.NumDstRegs; ++i) { | |||
const struct tgsi_dst_register *dst = &inst->Dst[i].Register; | |||
/* | |||
* Get the lp_tgsi_channel_info array corresponding to the destination | |||
* register file. | |||
*/ | |||
if (dst->File == TGSI_FILE_TEMPORARY) { | |||
regs = ctx->temp; | |||
max_regs = Elements(ctx->temp); | |||
} else if (dst->File == TGSI_FILE_OUTPUT) { | |||
regs = info->output; | |||
max_regs = Elements(info->output); | |||
} else if (dst->File == TGSI_FILE_ADDRESS || | |||
dst->File == TGSI_FILE_PREDICATE) { | |||
continue; | |||
} else { | |||
assert(0); | |||
continue; | |||
} | |||
/* | |||
* Detect direct TEX instructions | |||
*/ | |||
switch (inst->Instruction.Opcode) { | |||
case TGSI_OPCODE_TEX: | |||
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE); | |||
break; | |||
case TGSI_OPCODE_TXD: | |||
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); | |||
break; | |||
case TGSI_OPCODE_TXB: | |||
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); | |||
break; | |||
case TGSI_OPCODE_TXL: | |||
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); | |||
break; | |||
case TGSI_OPCODE_TXP: | |||
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED); | |||
break; | |||
default: | |||
break; | |||
} | |||
/* | |||
* Keep track of assignments and writes | |||
*/ | |||
if (dst->Indirect) { | |||
/* | |||
* It could be any register index so clear all register indices. | |||
*/ | |||
for (chan = 0; chan < 4; ++chan) { | |||
if (dst->WriteMask & (1 << chan)) { | |||
for (index = 0; index < max_regs; ++index) { | |||
regs[index][chan].file = TGSI_FILE_NULL; | |||
} | |||
} | |||
} | |||
} else if (dst->Index < max_regs) { | |||
/* | |||
* Update this destination register value. | |||
*/ | |||
struct lp_tgsi_channel_info res[4]; | |||
memset(res, 0, sizeof res); | |||
if (!inst->Instruction.Predicate && | |||
!inst->Instruction.Saturate) { | |||
for (chan = 0; chan < 4; ++chan) { | |||
if (dst->WriteMask & (1 << chan)) { | |||
if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) { | |||
analyse_src(ctx, &res[chan], | |||
&inst->Src[0].Register, chan); | |||
} else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { | |||
/* | |||
* Propagate values across 1.0 and 0.0 multiplications. | |||
*/ | |||
struct lp_tgsi_channel_info src0; | |||
struct lp_tgsi_channel_info src1; | |||
analyse_src(ctx, &src0, &inst->Src[0].Register, chan); | |||
analyse_src(ctx, &src1, &inst->Src[1].Register, chan); | |||
if (is_immediate(&src0, 0.0f)) { | |||
res[chan] = src0; | |||
} else if (is_immediate(&src1, 0.0f)) { | |||
res[chan] = src1; | |||
} else if (is_immediate(&src0, 1.0f)) { | |||
res[chan] = src1; | |||
} else if (is_immediate(&src1, 1.0f)) { | |||
res[chan] = src0; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
for (chan = 0; chan < 4; ++chan) { | |||
if (dst->WriteMask & (1 << chan)) { | |||
regs[dst->Index][chan] = res[chan]; | |||
} | |||
} | |||
} | |||
} | |||
/* | |||
* Clear all temporaries information in presence of a control flow opcode. | |||
*/ | |||
switch (inst->Instruction.Opcode) { | |||
case TGSI_OPCODE_IF: | |||
case TGSI_OPCODE_IFC: | |||
case TGSI_OPCODE_ELSE: | |||
case TGSI_OPCODE_ENDIF: | |||
case TGSI_OPCODE_BGNLOOP: | |||
case TGSI_OPCODE_BRK: | |||
case TGSI_OPCODE_BREAKC: | |||
case TGSI_OPCODE_CONT: | |||
case TGSI_OPCODE_ENDLOOP: | |||
case TGSI_OPCODE_CALLNZ: | |||
case TGSI_OPCODE_CAL: | |||
case TGSI_OPCODE_BGNSUB: | |||
case TGSI_OPCODE_ENDSUB: | |||
case TGSI_OPCODE_SWITCH: | |||
case TGSI_OPCODE_CASE: | |||
case TGSI_OPCODE_DEFAULT: | |||
case TGSI_OPCODE_ENDSWITCH: | |||
case TGSI_OPCODE_RET: | |||
case TGSI_OPCODE_END: | |||
/* XXX: Are there more cases? */ | |||
memset(&ctx->temp, 0, sizeof ctx->temp); | |||
memset(&info->output, 0, sizeof info->output); | |||
default: | |||
break; | |||
} | |||
} | |||
static INLINE void | |||
dump_info(const struct tgsi_token *tokens, | |||
struct lp_tgsi_info *info) | |||
{ | |||
unsigned index; | |||
unsigned chan; | |||
tgsi_dump(tokens, 0); | |||
for (index = 0; index < info->num_texs; ++index) { | |||
const struct lp_tgsi_texture_info *tex_info = &info->tex[index]; | |||
debug_printf("TEX[%u] =", index); | |||
for (chan = 0; chan < 4; ++chan) { | |||
const struct lp_tgsi_channel_info *chan_info = | |||
&tex_info->coord[chan]; | |||
if (chan_info->file != TGSI_FILE_NULL) { | |||
debug_printf(" %s[%u].%c", | |||
tgsi_file_names[chan_info->file], | |||
chan_info->u.index, | |||
"xyzw01"[chan_info->swizzle]); | |||
} else { | |||
debug_printf(" _"); | |||
} | |||
} | |||
debug_printf(", SAMP[%u], %s\n", | |||
tex_info->unit, | |||
tgsi_texture_names[tex_info->target]); | |||
} | |||
for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) { | |||
for (chan = 0; chan < 4; ++chan) { | |||
const struct lp_tgsi_channel_info *chan_info = | |||
&info->output[index][chan]; | |||
if (chan_info->file != TGSI_FILE_NULL) { | |||
debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]); | |||
if (chan_info->file == TGSI_FILE_IMMEDIATE) { | |||
debug_printf("%f", chan_info->u.value); | |||
} else { | |||
const char *file_name; | |||
switch (chan_info->file) { | |||
case TGSI_FILE_CONSTANT: | |||
file_name = "CONST"; | |||
break; | |||
case TGSI_FILE_INPUT: | |||
file_name = "IN"; | |||
break; | |||
default: | |||
file_name = "???"; | |||
break; | |||
} | |||
debug_printf("%s[%u].%c", | |||
file_name, | |||
chan_info->u.index, | |||
"xyzw01"[chan_info->swizzle]); | |||
} | |||
debug_printf("\n"); | |||
} | |||
} | |||
} | |||
} | |||
/** | |||
* Detect any direct relationship between the output color | |||
*/ | |||
void | |||
lp_build_tgsi_info(const struct tgsi_token *tokens, | |||
struct lp_tgsi_info *info) | |||
{ | |||
struct tgsi_parse_context parse; | |||
struct analysis_context ctx; | |||
unsigned index; | |||
unsigned chan; | |||
memset(info, 0, sizeof *info); | |||
tgsi_scan_shader(tokens, &info->base); | |||
memset(&ctx, 0, sizeof ctx); | |||
ctx.info = info; | |||
tgsi_parse_init(&parse, tokens); | |||
while (!tgsi_parse_end_of_tokens(&parse)) { | |||
tgsi_parse_token(&parse); | |||
switch (parse.FullToken.Token.Type) { | |||
case TGSI_TOKEN_TYPE_DECLARATION: | |||
break; | |||
case TGSI_TOKEN_TYPE_INSTRUCTION: | |||
{ | |||
struct tgsi_full_instruction *inst = | |||
&parse.FullToken.FullInstruction; | |||
if (inst->Instruction.Opcode == TGSI_OPCODE_END || | |||
inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { | |||
/* We reached the end of main function body. */ | |||
goto finished; | |||
} | |||
analyse_instruction(&ctx, inst); | |||
} | |||
break; | |||
case TGSI_TOKEN_TYPE_IMMEDIATE: | |||
{ | |||
const unsigned size = | |||
parse.FullToken.FullImmediate.Immediate.NrTokens - 1; | |||
assert(size <= 4); | |||
if (ctx.num_imms < Elements(ctx.imm)) { | |||
for (chan = 0; chan < size; ++chan) { | |||
ctx.imm[ctx.num_imms][chan] = | |||
parse.FullToken.FullImmediate.u[chan].Float; | |||
} | |||
++ctx.num_imms; | |||
} | |||
} | |||
break; | |||
case TGSI_TOKEN_TYPE_PROPERTY: | |||
break; | |||
default: | |||
assert(0); | |||
} | |||
} | |||
finished: | |||
tgsi_parse_free(&parse); | |||
/* | |||
* Link the output color values. | |||
*/ | |||
for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) { | |||
const struct lp_tgsi_channel_info null_output[4]; | |||
info->cbuf[index] = null_output; | |||
} | |||
for (index = 0; index < info->base.num_outputs; ++index) { | |||
unsigned semantic_name = info->base.output_semantic_name[index]; | |||
unsigned semantic_index = info->base.output_semantic_index[index]; | |||
if (semantic_name == TGSI_SEMANTIC_COLOR && | |||
semantic_index < PIPE_MAX_COLOR_BUFS) { | |||
info->cbuf[semantic_index] = info->output[index]; | |||
} | |||
} | |||
if (gallivm_debug & GALLIVM_DEBUG_TGSI) { | |||
dump_info(tokens, info); | |||
} | |||
} |
@@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, | |||
} | |||
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { | |||
LLVMTypeRef i32t = LLVMInt32Type(); | |||
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); | |||
for (i = 0; i < num_coords; i++) { | |||
ddx[i] = emit_fetch( bld, inst, 1, i ); | |||
ddy[i] = emit_fetch( bld, inst, 2, i ); | |||
LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); | |||
LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); | |||
ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); | |||
ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); | |||
} | |||
unit = inst->Src[3].Register.Index; | |||
} else { | |||
for (i = 0; i < num_coords; i++) { | |||
ddx[i] = lp_build_ddx( &bld->base, coords[i] ); | |||
ddy[i] = lp_build_ddy( &bld->base, coords[i] ); | |||
ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); | |||
ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); | |||
} | |||
unit = inst->Src[1].Register.Index; | |||
} | |||
for (i = num_coords; i < 3; i++) { | |||
ddx[i] = bld->base.undef; | |||
ddy[i] = bld->base.undef; | |||
ddx[i] = LLVMGetUndef(bld->base.elem_type); | |||
ddy[i] = LLVMGetUndef(bld->base.elem_type); | |||
} | |||
bld->sampler->emit_fetch_texel(bld->sampler, | |||
@@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, | |||
texel); | |||
} | |||
static boolean | |||
near_end_of_shader(struct lp_build_tgsi_soa_context *bld, | |||
int pc) | |||
{ | |||
int i; | |||
for (i = 0; i < 5; i++) { | |||
unsigned opcode; | |||
if (pc + i >= bld->info->num_instructions) | |||
return TRUE; | |||
opcode = bld->instructions[pc + i].Instruction.Opcode; | |||
if (opcode == TGSI_OPCODE_END) | |||
return TRUE; | |||
if (opcode == TGSI_OPCODE_TEX || | |||
opcode == TGSI_OPCODE_TXP || | |||
opcode == TGSI_OPCODE_TXD || | |||
opcode == TGSI_OPCODE_TXB || | |||
opcode == TGSI_OPCODE_TXL || | |||
opcode == TGSI_OPCODE_TXF || | |||
opcode == TGSI_OPCODE_TXQ || | |||
opcode == TGSI_OPCODE_CAL || | |||
opcode == TGSI_OPCODE_CALLNZ || | |||
opcode == TGSI_OPCODE_IF || | |||
opcode == TGSI_OPCODE_IFC || | |||
opcode == TGSI_OPCODE_BGNLOOP || | |||
opcode == TGSI_OPCODE_SWITCH) | |||
return FALSE; | |||
} | |||
return TRUE; | |||
} | |||
/** | |||
* Kill fragment if any of the src register values are negative. | |||
@@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, | |||
static void | |||
emit_kil( | |||
struct lp_build_tgsi_soa_context *bld, | |||
const struct tgsi_full_instruction *inst ) | |||
const struct tgsi_full_instruction *inst, | |||
int pc) | |||
{ | |||
const struct tgsi_full_src_register *reg = &inst->Src[0]; | |||
LLVMValueRef terms[NUM_CHANNELS]; | |||
@@ -959,8 +1001,12 @@ emit_kil( | |||
} | |||
} | |||
if(mask) | |||
if(mask) { | |||
lp_build_mask_update(bld->mask, mask); | |||
if (!near_end_of_shader(bld, pc)) | |||
lp_build_mask_check(bld->mask); | |||
} | |||
} | |||
@@ -972,7 +1018,8 @@ emit_kil( | |||
*/ | |||
static void | |||
emit_kilp(struct lp_build_tgsi_soa_context *bld, | |||
const struct tgsi_full_instruction *inst) | |||
const struct tgsi_full_instruction *inst, | |||
int pc) | |||
{ | |||
LLVMValueRef mask; | |||
@@ -987,6 +1034,9 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, | |||
} | |||
lp_build_mask_update(bld->mask, mask); | |||
if (!near_end_of_shader(bld, pc)) | |||
lp_build_mask_check(bld->mask); | |||
} | |||
static void | |||
@@ -1535,12 +1585,12 @@ emit_instruction( | |||
case TGSI_OPCODE_KILP: | |||
/* predicated kill */ | |||
emit_kilp( bld, inst ); | |||
emit_kilp( bld, inst, (*pc)-1 ); | |||
break; | |||
case TGSI_OPCODE_KIL: | |||
/* conditional kill */ | |||
emit_kil( bld, inst ); | |||
emit_kil( bld, inst, (*pc)-1 ); | |||
break; | |||
case TGSI_OPCODE_PK2H: |
@@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = { | |||
}; | |||
static INLINE boolean | |||
static INLINE int | |||
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, | |||
pb_size size, | |||
const struct pb_desc *desc) | |||
@@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, | |||
void *map; | |||
if(buf->base.base.size < size) | |||
return FALSE; | |||
return 0; | |||
/* be lenient with size */ | |||
if(buf->base.base.size >= 2*size) | |||
return FALSE; | |||
return 0; | |||
if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) | |||
return FALSE; | |||
return 0; | |||
if(!pb_check_usage(desc->usage, buf->base.base.usage)) | |||
return FALSE; | |||
return 0; | |||
map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); | |||
if (!map) { | |||
return FALSE; | |||
return -1; | |||
} | |||
pb_unmap(buf->buffer); | |||
return TRUE; | |||
return 1; | |||
} | |||
@@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, | |||
struct pb_cache_buffer *curr_buf; | |||
struct list_head *curr, *next; | |||
int64_t now; | |||
int ret = 0; | |||
pipe_mutex_lock(mgr->mutex); | |||
buf = NULL; | |||
@@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, | |||
now = os_time_get(); | |||
while(curr != &mgr->delayed) { | |||
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); | |||
if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc)) | |||
buf = curr_buf; | |||
if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0)) | |||
buf = curr_buf; | |||
else if(os_time_timeout(curr_buf->start, curr_buf->end, now)) | |||
_pb_cache_buffer_destroy(curr_buf); | |||
_pb_cache_buffer_destroy(curr_buf); | |||
else | |||
/* This buffer (and all hereafter) are still hot in cache */ | |||
break; | |||
if (ret == -1) | |||
break; | |||
curr = next; | |||
next = curr->next; | |||
} | |||
/* keep searching in the hot buffers */ | |||
if(!buf) { | |||
if(!buf && ret != -1) { | |||
while(curr != &mgr->delayed) { | |||
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); | |||
if(pb_cache_is_buffer_compat(curr_buf, size, desc)) { | |||
ret = pb_cache_is_buffer_compat(curr_buf, size, desc); | |||
if (ret > 0) { | |||
buf = curr_buf; | |||
break; | |||
} | |||
if (ret == -1) | |||
break; | |||
/* no need to check the timeout here */ | |||
curr = next; | |||
next = curr->next; | |||
@@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, | |||
if(buf) { | |||
LIST_DEL(&buf->head); | |||
--mgr->numDelayed; | |||
pipe_mutex_unlock(mgr->mutex); | |||
/* Increase refcount */ | |||
pipe_reference_init(&buf->base.base.reference, 1); |
@@ -480,7 +480,7 @@ struct rbug_proto_context_list * rbug_demarshal_context_list(struct rbug_proto_h | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST) | |||
return NULL; | |||
pos = 0; | |||
@@ -506,7 +506,7 @@ struct rbug_proto_context_info * rbug_demarshal_context_info(struct rbug_proto_h | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO) | |||
return NULL; | |||
pos = 0; | |||
@@ -533,7 +533,7 @@ struct rbug_proto_context_draw_block * rbug_demarshal_context_draw_block(struct | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCK) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCK) | |||
return NULL; | |||
pos = 0; | |||
@@ -561,7 +561,7 @@ struct rbug_proto_context_draw_step * rbug_demarshal_context_draw_step(struct rb | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_STEP) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_STEP) | |||
return NULL; | |||
pos = 0; | |||
@@ -589,7 +589,7 @@ struct rbug_proto_context_draw_unblock * rbug_demarshal_context_draw_unblock(str | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK) | |||
return NULL; | |||
pos = 0; | |||
@@ -617,7 +617,7 @@ struct rbug_proto_context_draw_rule * rbug_demarshal_context_draw_rule(struct rb | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_RULE) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_RULE) | |||
return NULL; | |||
pos = 0; | |||
@@ -649,7 +649,7 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_FLUSH) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_FLUSH) | |||
return NULL; | |||
pos = 0; | |||
@@ -677,7 +677,7 @@ struct rbug_proto_context_list_reply * rbug_demarshal_context_list_reply(struct | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST_REPLY) | |||
return NULL; | |||
pos = 0; | |||
@@ -705,7 +705,7 @@ struct rbug_proto_context_info_reply * rbug_demarshal_context_info_reply(struct | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO_REPLY) | |||
return NULL; | |||
pos = 0; | |||
@@ -739,7 +739,7 @@ struct rbug_proto_context_draw_blocked * rbug_demarshal_context_draw_blocked(str | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCKED) | |||
if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCKED) | |||
return NULL; | |||
pos = 0; |
@@ -233,7 +233,7 @@ struct rbug_proto_noop * rbug_demarshal_noop(struct rbug_proto_header *header) | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_NOOP) | |||
if (header->opcode != (int32_t)RBUG_OP_NOOP) | |||
return NULL; | |||
pos = 0; | |||
@@ -259,7 +259,7 @@ struct rbug_proto_ping * rbug_demarshal_ping(struct rbug_proto_header *header) | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_PING) | |||
if (header->opcode != (int32_t)RBUG_OP_PING) | |||
return NULL; | |||
pos = 0; | |||
@@ -285,7 +285,7 @@ struct rbug_proto_error * rbug_demarshal_error(struct rbug_proto_header *header) | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_ERROR) | |||
if (header->opcode != (int32_t)RBUG_OP_ERROR) | |||
return NULL; | |||
pos = 0; | |||
@@ -312,7 +312,7 @@ struct rbug_proto_ping_reply * rbug_demarshal_ping_reply(struct rbug_proto_heade | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_PING_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_PING_REPLY) | |||
return NULL; | |||
pos = 0; | |||
@@ -339,7 +339,7 @@ struct rbug_proto_error_reply * rbug_demarshal_error_reply(struct rbug_proto_hea | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_ERROR_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_ERROR_REPLY) | |||
return NULL; | |||
pos = 0; |
@@ -91,3 +91,67 @@ struct rbug_header * rbug_demarshal(struct rbug_proto_header *header) | |||
return NULL; | |||
} | |||
} | |||
const char* rbug_proto_get_name(enum rbug_opcode opcode) | |||
{ | |||
switch(opcode) { | |||
case RBUG_OP_NOOP: | |||
return "RBUG_OP_NOOP"; | |||
case RBUG_OP_PING: | |||
return "RBUG_OP_PING"; | |||
case RBUG_OP_ERROR: | |||
return "RBUG_OP_ERROR"; | |||
case RBUG_OP_PING_REPLY: | |||
return "RBUG_OP_PING_REPLY"; | |||
case RBUG_OP_ERROR_REPLY: | |||
return "RBUG_OP_ERROR_REPLY"; | |||
case RBUG_OP_TEXTURE_LIST: | |||
return "RBUG_OP_TEXTURE_LIST"; | |||
case RBUG_OP_TEXTURE_INFO: | |||
return "RBUG_OP_TEXTURE_INFO"; | |||
case RBUG_OP_TEXTURE_WRITE: | |||
return "RBUG_OP_TEXTURE_WRITE"; | |||
case RBUG_OP_TEXTURE_READ: | |||
return "RBUG_OP_TEXTURE_READ"; | |||
case RBUG_OP_TEXTURE_LIST_REPLY: | |||
return "RBUG_OP_TEXTURE_LIST_REPLY"; | |||
case RBUG_OP_TEXTURE_INFO_REPLY: | |||
return "RBUG_OP_TEXTURE_INFO_REPLY"; | |||
case RBUG_OP_TEXTURE_READ_REPLY: | |||
return "RBUG_OP_TEXTURE_READ_REPLY"; | |||
case RBUG_OP_CONTEXT_LIST: | |||
return "RBUG_OP_CONTEXT_LIST"; | |||
case RBUG_OP_CONTEXT_INFO: | |||
return "RBUG_OP_CONTEXT_INFO"; | |||
case RBUG_OP_CONTEXT_DRAW_BLOCK: | |||
return "RBUG_OP_CONTEXT_DRAW_BLOCK"; | |||
case RBUG_OP_CONTEXT_DRAW_STEP: | |||
return "RBUG_OP_CONTEXT_DRAW_STEP"; | |||
case RBUG_OP_CONTEXT_DRAW_UNBLOCK: | |||
return "RBUG_OP_CONTEXT_DRAW_UNBLOCK"; | |||
case RBUG_OP_CONTEXT_DRAW_RULE: | |||
return "RBUG_OP_CONTEXT_DRAW_RULE"; | |||
case RBUG_OP_CONTEXT_FLUSH: | |||
return "RBUG_OP_CONTEXT_FLUSH"; | |||
case RBUG_OP_CONTEXT_LIST_REPLY: | |||
return "RBUG_OP_CONTEXT_LIST_REPLY"; | |||
case RBUG_OP_CONTEXT_INFO_REPLY: | |||
return "RBUG_OP_CONTEXT_INFO_REPLY"; | |||
case RBUG_OP_CONTEXT_DRAW_BLOCKED: | |||
return "RBUG_OP_CONTEXT_DRAW_BLOCKED"; | |||
case RBUG_OP_SHADER_LIST: | |||
return "RBUG_OP_SHADER_LIST"; | |||
case RBUG_OP_SHADER_INFO: | |||
return "RBUG_OP_SHADER_INFO"; | |||
case RBUG_OP_SHADER_DISABLE: | |||
return "RBUG_OP_SHADER_DISABLE"; | |||
case RBUG_OP_SHADER_REPLACE: | |||
return "RBUG_OP_SHADER_REPLACE"; | |||
case RBUG_OP_SHADER_LIST_REPLY: | |||
return "RBUG_OP_SHADER_LIST_REPLY"; | |||
case RBUG_OP_SHADER_INFO_REPLY: | |||
return "RBUG_OP_SHADER_INFO_REPLY"; | |||
default: | |||
return NULL; | |||
} | |||
} |
@@ -91,4 +91,9 @@ struct rbug_proto_header | |||
*/ | |||
struct rbug_connection; | |||
/** | |||
* Get printable string for opcode. | |||
*/ | |||
const char* rbug_proto_get_name(enum rbug_opcode opcode); | |||
#endif |
@@ -305,7 +305,7 @@ struct rbug_proto_shader_list * rbug_demarshal_shader_list(struct rbug_proto_hea | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST) | |||
if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST) | |||
return NULL; | |||
pos = 0; | |||
@@ -332,7 +332,7 @@ struct rbug_proto_shader_info * rbug_demarshal_shader_info(struct rbug_proto_hea | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO) | |||
if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO) | |||
return NULL; | |||
pos = 0; | |||
@@ -360,7 +360,7 @@ struct rbug_proto_shader_disable * rbug_demarshal_shader_disable(struct rbug_pro | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_SHADER_DISABLE) | |||
if (header->opcode != (int32_t)RBUG_OP_SHADER_DISABLE) | |||
return NULL; | |||
pos = 0; | |||
@@ -389,7 +389,7 @@ struct rbug_proto_shader_replace * rbug_demarshal_shader_replace(struct rbug_pro | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_SHADER_REPLACE) | |||
if (header->opcode != (int32_t)RBUG_OP_SHADER_REPLACE) | |||
return NULL; | |||
pos = 0; | |||
@@ -418,7 +418,7 @@ struct rbug_proto_shader_list_reply * rbug_demarshal_shader_list_reply(struct rb | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST_REPLY) | |||
return NULL; | |||
pos = 0; | |||
@@ -446,7 +446,7 @@ struct rbug_proto_shader_info_reply * rbug_demarshal_shader_info_reply(struct rb | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO_REPLY) | |||
return NULL; | |||
pos = 0; |
@@ -417,7 +417,7 @@ struct rbug_proto_texture_list * rbug_demarshal_texture_list(struct rbug_proto_h | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST) | |||
return NULL; | |||
pos = 0; | |||
@@ -443,7 +443,7 @@ struct rbug_proto_texture_info * rbug_demarshal_texture_info(struct rbug_proto_h | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO) | |||
return NULL; | |||
pos = 0; | |||
@@ -470,7 +470,7 @@ struct rbug_proto_texture_write * rbug_demarshal_texture_write(struct rbug_proto | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_WRITE) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_WRITE) | |||
return NULL; | |||
pos = 0; | |||
@@ -506,7 +506,7 @@ struct rbug_proto_texture_read * rbug_demarshal_texture_read(struct rbug_proto_h | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ) | |||
return NULL; | |||
pos = 0; | |||
@@ -540,7 +540,7 @@ struct rbug_proto_texture_list_reply * rbug_demarshal_texture_list_reply(struct | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST_REPLY) | |||
return NULL; | |||
pos = 0; | |||
@@ -568,7 +568,7 @@ struct rbug_proto_texture_info_reply * rbug_demarshal_texture_info_reply(struct | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO_REPLY) | |||
return NULL; | |||
pos = 0; | |||
@@ -606,7 +606,7 @@ struct rbug_proto_texture_read_reply * rbug_demarshal_texture_read_reply(struct | |||
if (!header) | |||
return NULL; | |||
if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ_REPLY) | |||
if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ_REPLY) | |||
return NULL; | |||
pos = 0; |
@@ -58,7 +58,6 @@ | |||
#include <unistd.h> | |||
#include <sys/mman.h> | |||
#include "os/os_thread.h" | |||
#include "util/u_mm.h" | |||
#define EXEC_HEAP_SIZE (10*1024*1024) |
@@ -23,25 +23,12 @@ | |||
#include "cell/ppu/cell_public.h" | |||
#endif | |||
static INLINE struct pipe_screen * | |||
sw_screen_create(struct sw_winsys *winsys) | |||
sw_screen_create_named(struct sw_winsys *winsys, const char *driver) | |||
{ | |||
const char *default_driver; | |||
const char *driver; | |||
struct pipe_screen *screen = NULL; | |||
#if defined(GALLIUM_CELL) | |||
default_driver = "cell"; | |||
#elif defined(GALLIUM_LLVMPIPE) | |||
default_driver = "llvmpipe"; | |||
#elif defined(GALLIUM_SOFTPIPE) | |||
default_driver = "softpipe"; | |||
#else | |||
default_driver = ""; | |||
#endif | |||
driver = debug_get_option("GALLIUM_DRIVER", default_driver); | |||
#if defined(GALLIUM_CELL) | |||
if (screen == NULL && strcmp(driver, "cell") == 0) | |||
screen = cell_create_screen(winsys); | |||
@@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys) | |||
return screen; | |||
} | |||
static INLINE struct pipe_screen * | |||
sw_screen_create(struct sw_winsys *winsys) | |||
{ | |||
const char *default_driver; | |||
const char *driver; | |||
#if defined(GALLIUM_CELL) | |||
default_driver = "cell"; | |||
#elif defined(GALLIUM_LLVMPIPE) | |||
default_driver = "llvmpipe"; | |||
#elif defined(GALLIUM_SOFTPIPE) | |||
default_driver = "softpipe"; | |||
#else | |||
default_driver = ""; | |||
#endif | |||
driver = debug_get_option("GALLIUM_DRIVER", default_driver); | |||
return sw_screen_create_named(winsys, driver); | |||
} | |||
#endif |
@@ -13,22 +13,28 @@ static INLINE struct pipe_screen * | |||
sw_screen_wrap(struct pipe_screen *screen) | |||
{ | |||
struct sw_winsys *sws; | |||
struct pipe_screen *sw_screen; | |||
struct pipe_screen *sw_screen = NULL; | |||
const char *driver; | |||
sws = wrapper_sw_winsys_warp_pipe_screen(screen); | |||
driver = debug_get_option("GALLIUM_DRIVER", "native"); | |||
if (strcmp(driver, "native") == 0) | |||
return screen; | |||
sws = wrapper_sw_winsys_wrap_pipe_screen(screen); | |||
if (!sws) | |||
goto err; | |||
sw_screen = sw_screen_create(sws); | |||
if (sw_screen == screen) | |||
sw_screen = sw_screen_create_named(sws, driver); | |||
if (!sw_screen) | |||
goto err_winsys; | |||
return sw_screen; | |||
err_winsys: | |||
sws->destroy(sws); | |||
return wrapper_sw_winsys_dewrap_pipe_screen(sws); | |||
err: | |||
return screen; | |||
return screen; | |||
} | |||
#endif |
@@ -90,7 +90,8 @@ static const char *processor_type_names[] = | |||
"GEOM" | |||
}; | |||
static const char *file_names[TGSI_FILE_COUNT] = | |||
const char * | |||
tgsi_file_names[TGSI_FILE_COUNT] = | |||
{ | |||
"NULL", | |||
"CONST", | |||
@@ -125,7 +126,8 @@ static const char *semantic_names[] = | |||
"FACE", | |||
"EDGEFLAG", | |||
"PRIM_ID", | |||
"INSTANCEID" | |||
"INSTANCEID", | |||
"STENCIL" | |||
}; | |||
static const char *immediate_type_names[] = | |||
@@ -135,7 +137,8 @@ static const char *immediate_type_names[] = | |||
"INT32" | |||
}; | |||
static const char *swizzle_names[] = | |||
const char * | |||
tgsi_swizzle_names[] = | |||
{ | |||
"x", | |||
"y", | |||
@@ -143,7 +146,8 @@ static const char *swizzle_names[] = | |||
"w" | |||
}; | |||
static const char *texture_names[] = | |||
const char * | |||
tgsi_texture_names[] = | |||
{ | |||
"UNKNOWN", | |||
"1D", | |||
@@ -201,15 +205,15 @@ _dump_register_src( | |||
struct dump_ctx *ctx, | |||
const struct tgsi_full_src_register *src ) | |||
{ | |||
ENM(src->Register.File, file_names); | |||
ENM(src->Register.File, tgsi_file_names); | |||
if (src->Register.Dimension) { | |||
if (src->Dimension.Indirect) { | |||
CHR( '[' ); | |||
ENM( src->DimIndirect.File, file_names ); | |||
ENM( src->DimIndirect.File, tgsi_file_names ); | |||
CHR( '[' ); | |||
SID( src->DimIndirect.Index ); | |||
TXT( "]." ); | |||
ENM( src->DimIndirect.SwizzleX, swizzle_names ); | |||
ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names ); | |||
if (src->Dimension.Index != 0) { | |||
if (src->Dimension.Index > 0) | |||
CHR( '+' ); | |||
@@ -224,11 +228,11 @@ _dump_register_src( | |||
} | |||
if (src->Register.Indirect) { | |||
CHR( '[' ); | |||
ENM( src->Indirect.File, file_names ); | |||
ENM( src->Indirect.File, tgsi_file_names ); | |||
CHR( '[' ); | |||
SID( src->Indirect.Index ); | |||
TXT( "]." ); | |||
ENM( src->Indirect.SwizzleX, swizzle_names ); | |||
ENM( src->Indirect.SwizzleX, tgsi_swizzle_names ); | |||
if (src->Register.Index != 0) { | |||
if (src->Register.Index > 0) | |||
CHR( '+' ); | |||
@@ -248,15 +252,15 @@ _dump_register_dst( | |||
struct dump_ctx *ctx, | |||
const struct tgsi_full_dst_register *dst ) | |||
{ | |||
ENM(dst->Register.File, file_names); | |||
ENM(dst->Register.File, tgsi_file_names); | |||
if (dst->Register.Dimension) { | |||
if (dst->Dimension.Indirect) { | |||
CHR( '[' ); | |||
ENM( dst->DimIndirect.File, file_names ); | |||
ENM( dst->DimIndirect.File, tgsi_file_names ); | |||
CHR( '[' ); | |||
SID( dst->DimIndirect.Index ); | |||
TXT( "]." ); | |||
ENM( dst->DimIndirect.SwizzleX, swizzle_names ); | |||
ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names ); | |||
if (dst->Dimension.Index != 0) { | |||
if (dst->Dimension.Index > 0) | |||
CHR( '+' ); | |||
@@ -271,11 +275,11 @@ _dump_register_dst( | |||
} | |||
if (dst->Register.Indirect) { | |||
CHR( '[' ); | |||
ENM( dst->Indirect.File, file_names ); | |||
ENM( dst->Indirect.File, tgsi_file_names ); | |||
CHR( '[' ); | |||
SID( dst->Indirect.Index ); | |||
TXT( "]." ); | |||
ENM( dst->Indirect.SwizzleX, swizzle_names ); | |||
ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names ); | |||
if (dst->Register.Index != 0) { | |||
if (dst->Register.Index > 0) | |||
CHR( '+' ); | |||
@@ -351,7 +355,7 @@ iter_declaration( | |||
TXT( "DCL " ); | |||
ENM(decl->Declaration.File, file_names); | |||
ENM(decl->Declaration.File, tgsi_file_names); | |||
/* all geometry shader inputs are two dimensional */ | |||
if (decl->Declaration.File == TGSI_FILE_INPUT && | |||
@@ -585,10 +589,10 @@ iter_instruction( | |||
inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || | |||
inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { | |||
CHR( '.' ); | |||
ENM( inst->Predicate.SwizzleX, swizzle_names ); | |||
ENM( inst->Predicate.SwizzleY, swizzle_names ); | |||
ENM( inst->Predicate.SwizzleZ, swizzle_names ); | |||
ENM( inst->Predicate.SwizzleW, swizzle_names ); | |||
ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names ); | |||
ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names ); | |||
ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names ); | |||
ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names ); | |||
} | |||
TXT( ") " ); | |||
@@ -641,10 +645,10 @@ iter_instruction( | |||
src->Register.SwizzleZ != TGSI_SWIZZLE_Z || | |||
src->Register.SwizzleW != TGSI_SWIZZLE_W) { | |||
CHR( '.' ); | |||
ENM( src->Register.SwizzleX, swizzle_names ); | |||
ENM( src->Register.SwizzleY, swizzle_names ); | |||
ENM( src->Register.SwizzleZ, swizzle_names ); | |||
ENM( src->Register.SwizzleW, swizzle_names ); | |||
ENM( src->Register.SwizzleX, tgsi_swizzle_names ); | |||
ENM( src->Register.SwizzleY, tgsi_swizzle_names ); | |||
ENM( src->Register.SwizzleZ, tgsi_swizzle_names ); | |||
ENM( src->Register.SwizzleW, tgsi_swizzle_names ); | |||
} | |||
if (src->Register.Absolute) | |||
@@ -655,7 +659,7 @@ iter_instruction( | |||
if (inst->Instruction.Texture) { | |||
TXT( ", " ); | |||
ENM( inst->Texture.Texture, texture_names ); | |||
ENM( inst->Texture.Texture, tgsi_texture_names ); | |||
} | |||
switch (inst->Instruction.Opcode) { |
@@ -35,6 +35,15 @@ | |||
extern "C" { | |||
#endif | |||
extern const char * | |||
tgsi_file_names[TGSI_FILE_COUNT]; | |||
extern const char * | |||
tgsi_swizzle_names[]; | |||
extern const char * | |||
tgsi_texture_names[]; | |||
void | |||
tgsi_dump_str( | |||
const struct tgsi_token *tokens, |
@@ -605,8 +605,10 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) | |||
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { | |||
if ((inst->Src[i].Register.File == | |||
inst->Dst[0].Register.File) && | |||
(inst->Src[i].Register.Index == | |||
inst->Dst[0].Register.Index)) { | |||
((inst->Src[i].Register.Index == | |||
inst->Dst[0].Register.Index) || | |||
inst->Src[i].Register.Indirect || | |||
inst->Dst[0].Register.Indirect)) { | |||
/* loop over dest channels */ | |||
uint channelsWritten = 0x0; | |||
FOR_EACH_ENABLED_CHANNEL(*inst, chan) { |
@@ -163,6 +163,10 @@ OP12(USGE) | |||
OP12(USHR) | |||
OP12(USLT) | |||
OP12(USNE) | |||
OP01(SWITCH) | |||
OP01(CASE) | |||
OP00(DEFAULT) | |||
OP00(ENDSWITCH) | |||
#undef OP00 |
@@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, | |||
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; | |||
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; | |||
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; | |||
info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid; | |||
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; | |||
info->num_inputs++; | |||
} | |||
@@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens, | |||
/* extra info for special outputs */ | |||
if (procType == TGSI_PROCESSOR_FRAGMENT && | |||
fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { | |||
info->writes_z = TRUE; | |||
} | |||
fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) | |||
info->writes_z = TRUE; | |||
if (procType == TGSI_PROCESSOR_FRAGMENT && | |||
fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL) | |||
info->writes_stencil = TRUE; | |||
if (procType == TGSI_PROCESSOR_VERTEX && | |||
fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { | |||
info->writes_edgeflag = TRUE; |
@@ -45,6 +45,7 @@ struct tgsi_shader_info | |||
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ | |||
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; | |||
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; | |||
ubyte input_centroid[PIPE_MAX_SHADER_INPUTS]; | |||
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; | |||
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; | |||
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ | |||
@@ -60,6 +61,7 @@ struct tgsi_shader_info | |||
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ | |||
boolean writes_z; /**< does fragment shader write Z value? */ | |||
boolean writes_stencil; /**< does fragment shader write stencil value? */ | |||
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ | |||
boolean uses_kill; /**< KIL or KILP instruction used? */ | |||
@@ -2830,31 +2830,52 @@ static void soa_to_aos( struct x86_function *func, | |||
* Check if the instructions dst register is the same as any src | |||
* register and warn if there's a posible SOA dependency. | |||
*/ | |||
static void | |||
static boolean | |||
check_soa_dependencies(const struct tgsi_full_instruction *inst) | |||
{ | |||
switch (inst->Instruction.Opcode) { | |||
uint opcode = inst->Instruction.Opcode; | |||
/* XXX: we only handle src/dst aliasing in a few opcodes currently. | |||
* Need to use an additional temporay to hold the result in the | |||
* cases where the code is too opaque to fix. | |||
*/ | |||
switch (opcode) { | |||
case TGSI_OPCODE_ADD: | |||
case TGSI_OPCODE_MOV: | |||
case TGSI_OPCODE_MUL: | |||
case TGSI_OPCODE_RCP: | |||
case TGSI_OPCODE_RSQ: | |||
case TGSI_OPCODE_EXP: | |||
case TGSI_OPCODE_LOG: | |||
case TGSI_OPCODE_DP3: | |||
case TGSI_OPCODE_DP4: | |||
case TGSI_OPCODE_DP2A: | |||
case TGSI_OPCODE_EX2: | |||
case TGSI_OPCODE_LG2: | |||
case TGSI_OPCODE_POW: | |||
case TGSI_OPCODE_XPD: | |||
case TGSI_OPCODE_DPH: | |||
case TGSI_OPCODE_COS: | |||
case TGSI_OPCODE_SIN: | |||
case TGSI_OPCODE_TEX: | |||
case TGSI_OPCODE_TXB: | |||
case TGSI_OPCODE_TXP: | |||
case TGSI_OPCODE_NRM: | |||
case TGSI_OPCODE_NRM4: | |||
case TGSI_OPCODE_DP2: | |||
/* OK - these opcodes correctly handle SOA dependencies */ | |||
break; | |||
return TRUE; | |||
default: | |||
if (tgsi_check_soa_dependencies(inst)) { | |||
uint opcode = inst->Instruction.Opcode; | |||
if (!tgsi_check_soa_dependencies(inst)) | |||
return TRUE; | |||
/* XXX: we only handle src/dst aliasing in a few opcodes | |||
* currently. Need to use an additional temporay to hold | |||
* the result in the cases where the code is too opaque to | |||
* fix. | |||
*/ | |||
if (opcode != TGSI_OPCODE_MOV) { | |||
debug_printf("Warning: src/dst aliasing in instruction" | |||
" is not handled:\n"); | |||
tgsi_dump_instruction(inst, 1); | |||
} | |||
} | |||
debug_printf("Warning: src/dst aliasing in instruction" | |||
" is not handled:\n"); | |||
debug_printf("Warning: "); | |||
tgsi_dump_instruction(inst, 1); | |||
return FALSE; | |||
} | |||
} | |||
@@ -2954,7 +2975,8 @@ tgsi_emit_sse2( | |||
tgsi_get_processor_name(proc)); | |||
} | |||
check_soa_dependencies(&parse.FullToken.FullInstruction); | |||
if (ok) | |||
ok = check_soa_dependencies(&parse.FullToken.FullInstruction); | |||
break; | |||
case TGSI_TOKEN_TYPE_IMMEDIATE: |
@@ -96,7 +96,8 @@ struct ureg_program | |||
unsigned semantic_name; | |||
unsigned semantic_index; | |||
unsigned interp; | |||
unsigned cylindrical_wrap; | |||
unsigned char cylindrical_wrap; | |||
unsigned char centroid; | |||
} fs_input[UREG_MAX_INPUT]; | |||
unsigned nr_fs_inputs; | |||
@@ -286,11 +287,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, | |||
struct ureg_src | |||
ureg_DECL_fs_input_cyl(struct ureg_program *ureg, | |||
ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, | |||
unsigned semantic_name, | |||
unsigned semantic_index, | |||
unsigned interp_mode, | |||
unsigned cylindrical_wrap) | |||
unsigned cylindrical_wrap, | |||
unsigned centroid) | |||
{ | |||
unsigned i; | |||
@@ -306,6 +308,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg, | |||
ureg->fs_input[i].semantic_index = semantic_index; | |||
ureg->fs_input[i].interp = interp_mode; | |||
ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; | |||
ureg->fs_input[i].centroid = centroid; | |||
ureg->nr_fs_inputs++; | |||
} else { | |||
set_bad(ureg); | |||
@@ -1126,7 +1129,8 @@ emit_decl_fs(struct ureg_program *ureg, | |||
unsigned semantic_name, | |||
unsigned semantic_index, | |||
unsigned interpolate, | |||
unsigned cylindrical_wrap) | |||
unsigned cylindrical_wrap, | |||
unsigned centroid) | |||
{ | |||
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); | |||
@@ -1138,6 +1142,7 @@ emit_decl_fs(struct ureg_program *ureg, | |||
out[0].decl.Interpolate = interpolate; | |||
out[0].decl.Semantic = 1; | |||
out[0].decl.CylindricalWrap = cylindrical_wrap; | |||
out[0].decl.Centroid = centroid; | |||
out[1].value = 0; | |||
out[1].decl_range.First = index; | |||
@@ -1287,7 +1292,8 @@ static void emit_decls( struct ureg_program *ureg ) | |||
ureg->fs_input[i].semantic_name, | |||
ureg->fs_input[i].semantic_index, | |||
ureg->fs_input[i].interp, | |||
ureg->fs_input[i].cylindrical_wrap); | |||
ureg->fs_input[i].cylindrical_wrap, | |||
ureg->fs_input[i].centroid); | |||
} | |||
} else { | |||
for (i = 0; i < ureg->nr_gs_inputs; i++) { |
@@ -158,11 +158,27 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, | |||
*/ | |||
struct ureg_src | |||
ureg_DECL_fs_input_cyl(struct ureg_program *, | |||
ureg_DECL_fs_input_cyl_centroid(struct ureg_program *, | |||
unsigned semantic_name, | |||
unsigned semantic_index, | |||
unsigned interp_mode, | |||
unsigned cylindrical_wrap); | |||
unsigned cylindrical_wrap, | |||
unsigned centroid); | |||
static INLINE struct ureg_src | |||
ureg_DECL_fs_input_cyl(struct ureg_program *ureg, | |||
unsigned semantic_name, | |||
unsigned semantic_index, | |||
unsigned interp_mode, | |||
unsigned cylindrical_wrap) | |||
{ | |||
return ureg_DECL_fs_input_cyl_centroid(ureg, | |||
semantic_name, | |||
semantic_index, | |||
interp_mode, | |||
cylindrical_wrap, | |||
0); | |||
} | |||
static INLINE struct ureg_src | |||
ureg_DECL_fs_input(struct ureg_program *ureg, | |||
@@ -170,11 +186,11 @@ ureg_DECL_fs_input(struct ureg_program *ureg, | |||
unsigned semantic_index, | |||
unsigned interp_mode) | |||
{ | |||
return ureg_DECL_fs_input_cyl(ureg, | |||
return ureg_DECL_fs_input_cyl_centroid(ureg, | |||
semantic_name, | |||
semantic_index, | |||
interp_mode, | |||
0); | |||
0, 0); | |||
} | |||
struct ureg_src |
@@ -29,6 +29,8 @@ | |||
#define PIPE_ATOMIC_ASM_MSVC_X86 | |||
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)) | |||
#define PIPE_ATOMIC_ASM_GCC_X86 | |||
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64)) | |||
#define PIPE_ATOMIC_ASM_GCC_X86_64 | |||
#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401) | |||
#define PIPE_ATOMIC_GCC_INTRINSIC | |||
#else | |||
@@ -36,6 +38,51 @@ | |||
#endif | |||
#if defined(PIPE_ATOMIC_ASM_GCC_X86_64) | |||
#define PIPE_ATOMIC "GCC x86_64 assembly" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#define p_atomic_set(_v, _i) (*(_v) = (_i)) | |||
#define p_atomic_read(_v) (*(_v)) | |||
static INLINE boolean | |||
p_atomic_dec_zero(int32_t *v) | |||
{ | |||
unsigned char c; | |||
__asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c) | |||
::"memory"); | |||
return c != 0; | |||
} | |||
static INLINE void | |||
p_atomic_inc(int32_t *v) | |||
{ | |||
__asm__ __volatile__("lock; incl %0":"+m"(*v)); | |||
} | |||
static INLINE void | |||
p_atomic_dec(int32_t *v) | |||
{ | |||
__asm__ __volatile__("lock; decl %0":"+m"(*v)); | |||
} | |||
static INLINE int32_t | |||
p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new) | |||
{ | |||
return __sync_val_compare_and_swap(v, old, _new); | |||
} | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */ | |||
#if defined(PIPE_ATOMIC_ASM_GCC_X86) | |||
@@ -268,7 +268,7 @@ void util_blitter_destroy(struct blitter_context *blitter) | |||
pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]); | |||
} | |||
for (i = 0; i <= PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++) | |||
for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++) | |||
if (ctx->fs_col[i]) | |||
pipe->delete_fs_state(pipe, ctx->fs_col[i]); | |||
@@ -964,16 +964,18 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, | |||
blitter_restore_CSOs(ctx); | |||
} | |||
/* Clear a region of a depth stencil surface. */ | |||
void util_blitter_flush_depth_stencil(struct blitter_context *blitter, | |||
struct pipe_surface *dstsurf) | |||
/* draw a rectangle across a region using a custom dsa stage - for r600g */ | |||
void util_blitter_custom_depth_stencil(struct blitter_context *blitter, | |||
struct pipe_surface *zsurf, | |||
struct pipe_surface *cbsurf, | |||
void *dsa_stage, float depth) | |||
{ | |||
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; | |||
struct pipe_context *pipe = ctx->base.pipe; | |||
struct pipe_framebuffer_state fb_state; | |||
assert(dstsurf->texture); | |||
if (!dstsurf->texture) | |||
assert(zsurf->texture); | |||
if (!zsurf->texture) | |||
return; | |||
/* check the saved state */ | |||
@@ -981,8 +983,8 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter, | |||
assert(blitter->saved_fb_state.nr_cbufs != ~0); | |||
/* bind CSOs */ | |||
pipe->bind_blend_state(pipe, ctx->blend_keep_color); | |||
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); | |||
pipe->bind_blend_state(pipe, ctx->blend_write_color); | |||
pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage); | |||
pipe->bind_rasterizer_state(pipe, ctx->rs_state); | |||
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); | |||
@@ -990,15 +992,30 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter, | |||
pipe->bind_vertex_elements_state(pipe, ctx->velem_state); | |||
/* set a framebuffer state */ | |||
fb_state.width = dstsurf->width; | |||
fb_state.height = dstsurf->height; | |||
fb_state.nr_cbufs = 0; | |||
fb_state.cbufs[0] = 0; | |||
fb_state.zsbuf = dstsurf; | |||
fb_state.width = zsurf->width; | |||
fb_state.height = zsurf->height; | |||
fb_state.nr_cbufs = 1; | |||
if (cbsurf) { | |||
fb_state.cbufs[0] = cbsurf; | |||
fb_state.nr_cbufs = 1; | |||
} else { | |||
fb_state.cbufs[0] = NULL; | |||
fb_state.nr_cbufs = 0; | |||
} | |||
fb_state.zsbuf = zsurf; | |||
pipe->set_framebuffer_state(pipe, &fb_state); | |||
blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); | |||
blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, | |||
blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height); | |||
blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth, | |||
UTIL_BLITTER_ATTRIB_NONE, NULL); | |||
blitter_restore_CSOs(ctx); | |||
} | |||
/* flush a region of a depth stencil surface for r300g */ | |||
void util_blitter_flush_depth_stencil(struct blitter_context *blitter, | |||
struct pipe_surface *dstsurf) | |||
{ | |||
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; | |||
util_blitter_custom_depth_stencil(blitter, dstsurf, NULL, | |||
ctx->dsa_flush_depth_stencil, 0.0f); | |||
} |
@@ -203,6 +203,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, | |||
void util_blitter_flush_depth_stencil(struct blitter_context *blitter, | |||
struct pipe_surface *dstsurf); | |||
void util_blitter_custom_depth_stencil(struct blitter_context *blitter, | |||
struct pipe_surface *zsurf, | |||
struct pipe_surface *cbsurf, | |||
void *dsa_stage, float depth); | |||
/* The functions below should be used to save currently bound constant state | |||
* objects inside a driver. The objects are automatically restored at the end | |||
* of the util_blitter_{clear, copy_region, fill_region} functions and then |
@@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, | |||
PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs | |||
PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs | |||
PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs | |||
PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs | |||
PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs | |||
PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs | |||
PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs | |||
PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs | |||
PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs | |||
# YUV formats | |||
# http://www.fourcc.org/yuv.php#UYVY |
@@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d | |||
} | |||
} | |||
void | |||
util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) | |||
{ | |||
util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride, | |||
src_row, src_stride, | |||
width, height); | |||
} | |||
void | |||
util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) | |||
{ | |||
util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride, | |||
src_row, src_stride, | |||
width, height); | |||
} | |||
void | |||
util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) | |||
{ | |||
util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride, | |||
src_row, src_stride, | |||
width, height); | |||
} | |||
void | |||
util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) | |||
{ | |||
util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride, | |||
src_row, src_stride, | |||
width, height); | |||
} | |||
void | |||
util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, | |||
const uint8_t *src_row, unsigned src_stride, | |||
unsigned width, unsigned height) | |||
{ | |||
util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride, | |||
src_row, src_stride, | |||
width, height); | |||
} | |||
void | |||
util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, | |||
const uint8_t *src_row, unsigned src_stride, | |||
unsigned width, unsigned height) | |||
{ | |||
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride, | |||
src_row, src_stride, | |||
width, height); | |||
} |
@@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned | |||
void | |||
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
void | |||
util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
void | |||
util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
void | |||
util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
void | |||
util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
void | |||
util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
void | |||
util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); | |||
#endif /* U_FORMAT_ZS_H_ */ |
@@ -0,0 +1,127 @@ | |||
/* | |||
* Copyright 2010 Marek Olšák <maraeo@gmail.com> | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* on the rights to use, copy, modify, merge, publish, distribute, sub | |||
* license, and/or sell copies of the Software, and to permit persons to whom | |||
* the Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | |||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ | |||
#include "pipe/p_context.h" | |||
#include "util/u_index_modify.h" | |||
#include "util/u_inlines.h" | |||
void util_shorten_ubyte_elts(struct pipe_context *context, | |||
struct pipe_resource **elts, | |||
int index_bias, | |||
unsigned start, | |||
unsigned count) | |||
{ | |||
struct pipe_screen* screen = context->screen; | |||
struct pipe_resource* new_elts; | |||
unsigned char *in_map; | |||
unsigned short *out_map; | |||
struct pipe_transfer *src_transfer, *dst_transfer; | |||
unsigned i; | |||
new_elts = pipe_buffer_create(screen, | |||
PIPE_BIND_INDEX_BUFFER, | |||
2 * count); | |||
in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); | |||
out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); | |||
in_map += start; | |||
for (i = 0; i < count; i++) { | |||
*out_map = (unsigned short)(*in_map + index_bias); | |||
in_map++; | |||
out_map++; | |||
} | |||
pipe_buffer_unmap(context, *elts, src_transfer); | |||
pipe_buffer_unmap(context, new_elts, dst_transfer); | |||
*elts = new_elts; | |||
} | |||
void util_rebuild_ushort_elts(struct pipe_context *context, | |||
struct pipe_resource **elts, | |||
int index_bias, | |||
unsigned start, unsigned count) | |||
{ | |||
struct pipe_transfer *in_transfer = NULL; | |||
struct pipe_transfer *out_transfer = NULL; | |||
struct pipe_resource *new_elts; | |||
unsigned short *in_map; | |||
unsigned short *out_map; | |||
unsigned i; | |||
new_elts = pipe_buffer_create(context->screen, | |||
PIPE_BIND_INDEX_BUFFER, | |||
2 * count); | |||
in_map = pipe_buffer_map(context, *elts, | |||
PIPE_TRANSFER_READ, &in_transfer); | |||
out_map = pipe_buffer_map(context, new_elts, | |||
PIPE_TRANSFER_WRITE, &out_transfer); | |||
in_map += start; | |||
for (i = 0; i < count; i++) { | |||
*out_map = (unsigned short)(*in_map + index_bias); | |||
in_map++; | |||
out_map++; | |||
} | |||
pipe_buffer_unmap(context, *elts, in_transfer); | |||
pipe_buffer_unmap(context, new_elts, out_transfer); | |||
*elts = new_elts; | |||
} | |||
void util_rebuild_uint_elts(struct pipe_context *context, | |||
struct pipe_resource **elts, | |||
int index_bias, | |||
unsigned start, unsigned count) | |||
{ | |||
struct pipe_transfer *in_transfer = NULL; | |||
struct pipe_transfer *out_transfer = NULL; | |||
struct pipe_resource *new_elts; | |||
unsigned int *in_map; | |||
unsigned int *out_map; | |||
unsigned i; | |||
new_elts = pipe_buffer_create(context->screen, | |||
PIPE_BIND_INDEX_BUFFER, | |||
2 * count); | |||
in_map = pipe_buffer_map(context, *elts, | |||
PIPE_TRANSFER_READ, &in_transfer); | |||
out_map = pipe_buffer_map(context, new_elts, | |||
PIPE_TRANSFER_WRITE, &out_transfer); | |||
in_map += start; | |||
for (i = 0; i < count; i++) { | |||
*out_map = (unsigned int)(*in_map + index_bias); | |||
in_map++; | |||
out_map++; | |||
} | |||
pipe_buffer_unmap(context, *elts, in_transfer); | |||
pipe_buffer_unmap(context, new_elts, out_transfer); | |||
*elts = new_elts; | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org> | |||
* Copyright 2010 Marek Olšák <maraeo@gmail.com> | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
@@ -18,40 +18,24 @@ | |||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | |||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* Authors: | |||
* Jerome Glisse | |||
*/ | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <errno.h> | |||
#include "radeon_priv.h" | |||
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ | |||
/* | |||
* draw functions | |||
*/ | |||
int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon) | |||
{ | |||
draw->radeon = radeon; | |||
draw->state = calloc(radeon->max_states, sizeof(void*)); | |||
if (draw->state == NULL) | |||
return -ENOMEM; | |||
return 0; | |||
} | |||
#ifndef UTIL_INDEX_MODIFY_H | |||
#define UTIL_INDEX_MODIFY_H | |||
void util_shorten_ubyte_elts(struct pipe_context *context, | |||
struct pipe_resource **elts, | |||
int index_bias, | |||
unsigned start, | |||
unsigned count); | |||
void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state) | |||
{ | |||
if (state == NULL) | |||
return; | |||
draw->state[state->state_id] = state; | |||
} | |||
void util_rebuild_ushort_elts(struct pipe_context *context, | |||
struct pipe_resource **elts, | |||
int index_bias, | |||
unsigned start, unsigned count); | |||
void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state) | |||
{ | |||
if (state == NULL) | |||
return; | |||
if (draw->state[state->state_id] == state) { | |||
draw->state[state->state_id] = NULL; | |||
} | |||
} | |||
void util_rebuild_uint_elts(struct pipe_context *context, | |||
struct pipe_resource **elts, | |||
int index_bias, | |||
unsigned start, unsigned count); | |||
#endif |
@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val) | |||
#endif | |||
#ifndef M_SQRT2 | |||
#define M_SQRT2 1.41421356237309504880 | |||
#endif | |||
#if defined(_MSC_VER) | |||
#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) |