diff options
author | Brice Goglin <bgoglin@debian.org> | 2008-06-10 22:48:43 +0200 |
---|---|---|
committer | Brice Goglin <bgoglin@debian.org> | 2008-06-10 22:48:43 +0200 |
commit | 2d0ca23319ca1df4a9b099a9d3c13ae4bea9b37b (patch) | |
tree | 49bfd8344bdad21f8783ff76ff4b5906502c7d56 | |
parent | 59c9c40c010ac815668ee4c411003f93aeff857d (diff) | |
parent | 6f4c8b5b5047c6ff6273e3acc98c7ec504bb0e21 (diff) | |
download | mesa-2d0ca23319ca1df4a9b099a9d3c13ae4bea9b37b.tar.gz |
Merge branch 'mesa_7_0_branch' of git://git.freedesktop.org/git/mesa/mesa into debian-unstable
106 files changed, 3033 insertions, 494 deletions
@@ -75,10 +75,15 @@ darwin-fat-32bit \ darwin-fat-all \ darwin-static \ darwin-static-x86ppc \ +dragonfly \ +dragonfly-dri \ +dragonfly-dri-amd64 \ +dragonfly-dri-x86 \ freebsd \ freebsd-dri \ freebsd-dri-amd64 \ freebsd-dri-x86 \ +freebsd-static \ hpux10 \ hpux10-gcc \ hpux10-static \ diff --git a/bin/mklib b/bin/mklib index 90bf834b435..93eeb441509 100755 --- a/bin/mklib +++ b/bin/mklib @@ -111,6 +111,13 @@ do # this is a special case (see bugzilla 10876) DEPS="$DEPS $1" ;; + -Wl*) + # Another special case for DragonFly + DEPS="$DEPS $1" + ;; + -Wl*) + DEPS="$DEPS $1" + ;; '-pthread') DEPS="$DEPS -pthread" ;; @@ -198,7 +205,7 @@ fi # case $ARCH in - 'Linux' | 'OpenBSD' | 'GNU' | GNU/*) + 'Linux' | 'OpenBSD' | 'DragonFly' | 'GNU' | GNU/*) # we assume gcc if [ "x$LINK" = "x" ] ; then diff --git a/configs/dragonfly b/configs/dragonfly new file mode 100644 index 00000000000..97c10e3c90e --- /dev/null +++ b/configs/dragonfly @@ -0,0 +1,38 @@ +# Configuration for DragonFly + +include $(TOP)/configs/default + +CONFIG_NAME = DragonFly + +# Compiler and flags +CC = cc +CXX = c++ +MAKE = gmake + +OPT_FLAGS = -O2 +PIC_FLAGS = -fPIC + +DEFINES = -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_BSD_SOURCE -DUSE_XSHM \ + -DHZ=100 + +X11_INCLUDES = -I/usr/pkg/include + +CFLAGS += $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(DEFINES) $(X11_INCLUDES) \ + -ffast-math -pedantic + +CXXFLAGS += $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(DEFINES) $(X11_INCLUDES) + +GLUT_CFLAGS = -fexceptions + +# Work around aliasing bugs - developers should comment this out +CFLAGS += -fno-strict-aliasing +CXXFLAGS += -fno-strict-aliasing + +EXTRA_LIB_PATH = -L/usr/pkg/lib +APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) $(EXTRA_LIB_PATH) -l$(GLUT_LIB) \ + -l$(GLU_LIB) -l$(GL_LIB) -lXext -lXmu -lXi -lX11 -lm + +# Installation directories (for make install) +INSTALL_DIR = /usr/pkg +DRI_DRIVER_INSTALL_DIR = /usr/pkg/lib/modules/dri/ + diff --git a/configs/dragonfly-dri b/configs/dragonfly-dri new file mode 100644 index 00000000000..cb969718042 --- /dev/null +++ b/configs/dragonfly-dri @@ -0,0 +1,56 @@ +# -*-makefile-*- +# Configuration for dragonfly-dri: DragonFly DRI hardware drivers + +include $(TOP)/configs/dragonfly + +CONFIG_NAME = dragonfly-dri + +# Compiler and flags +CC = gcc +CXX = g++ +WARN_FLAGS = -Wall +OPT_FLAGS = -O -g + +EXPAT_INCLUDES = -I/usr/pkg/include +X11_INCLUDES = -I/usr/pkg/include +DEFINES = -DPTHREADS -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER \ + -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING \ + -DHAVE_ALIAS + +CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) -Wmissing-prototypes \ + -std=c99 -Wundef -ffast-math $(ASM_FLAGS) $(X11_INCLUDES) $(DEFINES) + +CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(DEFINES) -Wall -ansi \ + -pedantic $(ASM_FLAGS) $(X11_INCLUDES) + +# Work around aliasing bugs - developers should comment this out +CFLAGS += -fno-strict-aliasing +CXXFLAGS += -fno-strict-aliasing + +ASM_SOURCES = + +# Library/program dependencies +LIBDRM_CFLAGS = `pkg-config --cflags libdrm` +LIBDRM_LIB = `pkg-config --libs libdrm` +DRI_LIB_DEPS = -L/usr/pkg/lib -lm -lpthread -lexpat $(LIBDRM_LIB) +GL_LIB_DEPS = -L/usr/pkg/lib -lX11 -lXext -lXxf86vm -lXdamage -lXfixes \ + -lm -lpthread $(LIBDRM_LIB) + +GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/pkg/lib -lGLU -lGL -lX11 -lXmu \ + -lXt -lXi -lm +GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/pkg/lib -lGL -lXt -lX11 + + +# Directories +SRC_DIRS = glx/x11 mesa glu glut/glx glw +DRIVER_DIRS = dri +PROGRAM_DIRS = +WINDOW_SYSTEM = dri + +DRM_SOURCE_PATH = $(TOP)/../drm + +# ffb and gamma are missing because they have not been converted to use the new +# interface. +DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon tdfx \ + unichrome savage sis + diff --git a/configs/dragonfly-dri-amd64 b/configs/dragonfly-dri-amd64 new file mode 100644 index 00000000000..6c812da794d --- /dev/null +++ b/configs/dragonfly-dri-amd64 @@ -0,0 +1,10 @@ +# -*-makefile-*- +# Configuration for dragonfly-dri-amd64: DragonFly DRI hardware drivers + +include $(TOP)/configs/dragonfly-dri + +CONFIG_NAME = dragonfly-dri-x86-64 + +ASM_FLAGS = -DUSE_X86_64_ASM +ASM_SOURCES = $(X86-64_SOURCES) +ASM_API = $(X86-64_API) diff --git a/configs/dragonfly-dri-x86 b/configs/dragonfly-dri-x86 new file mode 100644 index 00000000000..1fefa80428c --- /dev/null +++ b/configs/dragonfly-dri-x86 @@ -0,0 +1,13 @@ +# -*-makefile-*- +# Configuration for dragonfly-dri-x86: DragonFly DRI hardware drivers + +include $(TOP)/configs/dragonfly-dri + +CONFIG_NAME = dragonfly-dri-x86 + +# Unnecessary on x86, generally. +PIC_FLAGS = + +ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM +ASM_SOURCES = $(X86_SOURCES) +ASM_API = $(X86_API) diff --git a/configs/freebsd-static b/configs/freebsd-static new file mode 100644 index 00000000000..f87b1b8e420 --- /dev/null +++ b/configs/freebsd-static @@ -0,0 +1,27 @@ +# Configuration for generic FreeBSD, making static libs +# Written by cy on 2008-04-23. + +include $(TOP)/configs/freebsd + +CONFIG_NAME = freebsd-static + +MKLIB_OPTIONS = -static +PIC_FLAGS = + +# Library names (actual file names) +GL_LIB_NAME = libGL.a +GLU_LIB_NAME = libGLU.a +GLUT_LIB_NAME = libglut.a +GLW_LIB_NAME = libGLw.a +OSMESA_LIB_NAME = libOSMesa.a + +# Library/program dependencies (static libs don't have dependencies) +GL_LIB_DEPS = +OSMESA_LIB_DEPS = +GLU_LIB_DEPS = +GLUT_LIB_DEPS = +GLW_LIB_DEPS = + +# Need to specify all libraries we may need +APP_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lXmu -lXt -lXi -lpthread \ + -lstdc++ -lm diff --git a/docs/relnotes-7.0.3.html b/docs/relnotes-7.0.3.html index 0b8a56f6dd7..5c8efc79d7c 100644 --- a/docs/relnotes-7.0.3.html +++ b/docs/relnotes-7.0.3.html @@ -17,6 +17,15 @@ Mesa 7.0.3 is a stable release with bug fixes since version 7.0.2. <h2>MD5 checksums</h2> <pre> +3fd1cb76531b2515ef7db92d9a93dbf8 MesaLib-7.0.3.tar.gz +e6e6379d7793af40a6bc3ce1bace572e MesaLib-7.0.3.tar.bz2 +97882bac195229ee0b78cab82e0e3be1 MesaLib-7.0.3.zip +8abf6bbcb1661e7dd4ce73b3fbb85898 MesaDemos-7.0.3.tar.gz +47fd6863621d3c9c7dbb870ab7f0c303 MesaDemos-7.0.3.tar.bz2 +99e442e14da1928f76a7297bb421a3af MesaDemos-7.0.3.zip +2b50fe9fadc4709b57c52adef09fce3c MesaGLUT-7.0.3.tar.gz +0ff23c4e91b238abae63a5fc9fa003e7 MesaGLUT-7.0.3.tar.bz2 +70e83554a4462dad28e0d6e20f79aada MesaGLUT-7.0.3.zip </pre> diff --git a/docs/relnotes-7.0.4.html b/docs/relnotes-7.0.4.html new file mode 100644 index 00000000000..27fbf17e86d --- /dev/null +++ b/docs/relnotes-7.0.4.html @@ -0,0 +1,65 @@ +<HTML> + +<TITLE>Mesa Release Notes</TITLE> + +<head><link rel="stylesheet" type="text/css" href="mesa.css"></head> + +<BODY> + +<body bgcolor="#eeeeee"> + +<H1>Mesa 7.0.4 Release Notes / (TBD 2008)</H1> + +<p> +Mesa 7.0.4 is a stable release with bug fixes since version 7.0.3. +</p> + + +<h2>MD5 checksums</h2> +<pre> +</pre> + + +<h2>Bug fixes</h2> +<ul> +<li>define #extension GL_ARB_texture_rectangle in shading language +<li>fixed WIN32 compile problem in libGLU +<li>Fixed a per-vertex glMaterial bug which could cause bad lighting +<li>Fixed potential crash in AA/smoothed triangle rendering when using a fragment shader +<li>Fixed glDrawElement + VBO segfault (bug 16156) +</ul> + +<h2>Changes</h2> +<ul> +<li>Added support for DragonFly OS +<li>Added a build config for FreeBSD static libs (Anatolij Shkodin) +<li>Enabled GL_EXT_multi_draw_arrays extension in R200/R300 drivers +<li>Enabled GL_ARB_point_sprite extension in I965 driver +<li>Enabled GL_EXT_texture_sRGB extension in I965 driver +<li>Added support for GL shading language in I965 driver +</ul> + + + + +<h2>Driver Status</h2> + +<pre> +Driver Status +---------------------- ---------------------- +DRI drivers varies with the driver +XMesa/GLX (on Xlib) implements OpenGL 2.1 +OSMesa (off-screen) implements OpenGL 2.1 +Windows/Win32 implements OpenGL 2.1 +Glide (3dfx Voodoo1/2) implements OpenGL 1.3 +SVGA unsupported +Wind River UGL unsupported +DJGPP unsupported +GGI unsupported +BeOS unsupported +Allegro unsupported +D3D unsupported +</pre> + +</body> +</html> diff --git a/docs/relnotes.html b/docs/relnotes.html index 8469c7f6709..79e7118e2c8 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -20,6 +20,7 @@ The release notes summarize what's new or changed in each Mesa release. </p> <UL> +<LI><A HREF="relnotes-7.0.4.html">7.0.4 release notes</A> <LI><A HREF="relnotes-7.0.3.html">7.0.3 release notes</A> <LI><A HREF="relnotes-7.0.2.html">7.0.2 release notes</A> <LI><A HREF="relnotes-7.0.1.html">7.0.1 release notes</A> diff --git a/src/glu/sgi/libutil/mipmap.c b/src/glu/sgi/libutil/mipmap.c index 44f519a4e21..d65b7f689f5 100644 --- a/src/glu/sgi/libutil/mipmap.c +++ b/src/glu/sgi/libutil/mipmap.c @@ -6627,7 +6627,7 @@ typedef void (GLAPIENTRY *TexImage3Dproc)( GLenum target, GLint level, static TexImage3Dproc pTexImage3D = 0; -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(__WIN32__) # include <dlfcn.h> # include <sys/types.h> #else @@ -6642,7 +6642,7 @@ static void gluTexImage3D( GLenum target, GLint level, const GLvoid *pixels ) { if (!pTexImage3D) { -#ifdef _WIN32 +#if defined(_WIN32) || defined(__WIN32__) pTexImage3D = (TexImage3Dproc) wglGetProcAddress("glTexImage3D"); if (!pTexImage3D) pTexImage3D = (TexImage3Dproc) wglGetProcAddress("glTexImage3DEXT"); diff --git a/src/glut/glx/Makefile b/src/glut/glx/Makefile index 9f995667b40..46185cebbc0 100644 --- a/src/glut/glx/Makefile +++ b/src/glut/glx/Makefile @@ -126,6 +126,6 @@ depend: $(SOURCES) @ echo "running $(MKDEP)" @ touch depend @ $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(SOURCES) \ - > /dev/null + $(X11_INCLUDES) > /dev/null include depend diff --git a/src/glw/Makefile b/src/glw/Makefile index 727c5838be5..8370e3bb6cd 100644 --- a/src/glw/Makefile +++ b/src/glw/Makefile @@ -61,7 +61,7 @@ $(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME): $(OBJECTS) depend: $(GLW_SOURCES) touch depend $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(GLW_SOURCES) \ - > /dev/null + $(X11_INCLUDES) > /dev/null include depend diff --git a/src/mesa/drivers/dri/common/vblank.c b/src/mesa/drivers/dri/common/vblank.c index e7ed545f13c..5ef80406589 100644 --- a/src/mesa/drivers/dri/common/vblank.c +++ b/src/mesa/drivers/dri/common/vblank.c @@ -233,8 +233,8 @@ static int do_wait( drmVBlank * vbl, GLuint * vbl_seq, int fd ) if ( first_time ) { fprintf(stderr, "%s: drmWaitVBlank returned %d, IRQs don't seem to be" - " working correctly.\nTry running with LIBGL_THROTTLE_REFRESH" - " and LIBL_SYNC_REFRESH unset.\n", __FUNCTION__, ret); + " working correctly.\nTry adjusting the vblank_mode" + " configuration parameter.\n", __FUNCTION__, ret); first_time = GL_FALSE; } diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index b635894fe53..9f6f3277daf 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -63,6 +63,9 @@ extern char *program_invocation_name, *program_invocation_short_name; #elif defined(__NetBSD__) && defined(__NetBSD_Version) && (__NetBSD_Version >= 106000100) # include <stdlib.h> # define GET_PROGRAM_NAME() getprogname() +#elif defined(__DragonFly__) +# include <stdlib.h> +# define GET_PROGRAM_NAME() getprogname() #endif #if !defined(GET_PROGRAM_NAME) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index a28c8bb6fc2..ffb6b31af32 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -42,7 +42,20 @@ #include "program.h" #include "programopt.h" - +static const GLfloat sin_quad_constants[2][4] = { + { + 2.0, + -1.0, + .5, + .75 + }, + { + 4.0, + -4.0, + 1.0 / (2.0 * M_PI), + .2225 + } +}; /* 1, -1/3!, 1/5!, -1/7! */ static const GLfloat sin_constants[4] = { 1.0, @@ -91,7 +104,7 @@ static GLuint src_vector( struct i915_fragment_program *p, break; case FRAG_ATTRIB_FOGC: src = i915_emit_decl( p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W ); - src = swizzle( src, W, W, W, W ); + src = swizzle(src, W, ZERO, ZERO, ONE); break; case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: @@ -269,7 +282,7 @@ static void upload_program( struct i915_fragment_program *p ) while (1) { GLuint src0, src1, src2, flags; - GLuint tmp = 0; + GLuint tmp = 0, consts0 = 0, consts1 = 0; switch (inst->Opcode) { case OPCODE_ABS: @@ -297,67 +310,87 @@ static void upload_program( struct i915_fragment_program *p ) break; case OPCODE_COS: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); - - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, - i915_emit_const1f(p, 1.0/(M_PI * 2)), - 0); - - i915_emit_arith( p, - A0_MOD, + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); + consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); + + /* Reduce range from repeating about [-pi,pi] to [-1,1] */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + src0, + swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ + swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */ + + i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + i915_emit_arith(p, + A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, - tmp, - 0, 0 ); + tmp, + swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ + swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ - /* By choosing different taylor constants, could get rid of this mul: + /* Compute COS with the same calculation used for SIN, but a + * different source range has been mapped to [-1,1] this time. */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - i915_emit_const1f(p, (M_PI * 2)), + + /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 - * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * result = DP4 t0, cos_constants - */ - i915_emit_arith( p, + /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ + i915_emit_arith(p, A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X,X,ONE,ONE), - swizzle(tmp, X,ONE,ONE,ONE), 0); + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + tmp, + 0); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X,Y,X,ONE), - swizzle(tmp, X,X,ONE,ONE), 0); + /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ + i915_emit_arith(p, + A0_DP3, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, + swizzle(consts1, X, Y, ZERO, ZERO), + 0); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X,X,Z,ONE), - swizzle(tmp, Z,ONE,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(tmp, ONE,Z,Y,X), - i915_emit_const4fv( p, cos_constants ), 0); + /* tmp.x now contains a first approximation (y). Now, weight it + * against tmp.y**2 to get closer. + */ + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), + 0); - break; + /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + swizzle(tmp, ZERO, Y, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); - case OPCODE_DP3: - EMIT_2ARG_ARITH( A0_DP3 ); - break; + /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(consts1, W, W, W, W), + swizzle(tmp, Y, Y, Y, Y), + swizzle(tmp, X, X, X, X)); + break; + + case OPCODE_DP3: + EMIT_2ARG_ARITH(A0_DP3); + break; case OPCODE_DP4: EMIT_2ARG_ARITH( A0_DP4 ); @@ -638,62 +671,86 @@ static void upload_program( struct i915_fragment_program *p ) break; case OPCODE_SIN: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); + consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); + + /* Reduce range from repeating about [-pi,pi] to [-1,1] */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + src0, + swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ + swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */ + + i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, + swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ + swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ - i915_emit_arith( p, + /* Compute sin using a quadratic and quartic. It gives continuity + * that repeating the Taylor series lacks every 2*pi, and has + * reduced error. + * + * The idea was described at: + * http://www.devmaster.net/forums/showthread.php?t=5784 + */ + /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), + 0); + + /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ + i915_emit_arith(p, A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, - i915_emit_const1f(p, 1.0/(M_PI * 2)), + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + tmp, 0); - i915_emit_arith( p, - A0_MOD, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - 0, 0 ); + /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ + i915_emit_arith(p, + A0_DP3, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, + swizzle(consts1, X, Y, ZERO, ZERO), + 0); - /* By choosing different taylor constants, could get rid of this mul: + /* tmp.x now contains a first approximation (y). Now, weight it + * against tmp.y**2 to get closer. */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - i915_emit_const1f(p, (M_PI * 2)), + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x - * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * result = DP4 t1.wzyx, sin_constants - */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X,X,ONE,ONE), - swizzle(tmp, X,ONE,ONE,ONE), 0); + /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + swizzle(tmp, ZERO, Y, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,X,Y), - swizzle(tmp, X,X,ONE,ONE), 0); + /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(consts1, W, W, W, W), + swizzle(tmp, Y, Y, Y, Y), + swizzle(tmp, X, X, X, X)); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,Y,W), - swizzle(tmp, X,Z,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(tmp, W, Z, Y, X ), - i915_emit_const4fv( p, sin_constants ), 0); - break; + break; case OPCODE_SLT: EMIT_2ARG_ARITH( A0_SLT ); diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index a19d4b65840..8a56fc334af 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -454,7 +454,12 @@ static void i915SetTexImages( i915ContextPtr i915, case MESA_FORMAT_Z16: t->intel.texelBytes = 2; - textureFormat = (MAPSURF_16BIT | MT_16BIT_L16); + if (tObj->DepthMode == GL_ALPHA) + textureFormat = (MAPSURF_16BIT | MT_16BIT_A16); + else if (tObj->DepthMode == GL_INTENSITY) + textureFormat = (MAPSURF_16BIT | MT_16BIT_I16); + else + textureFormat = (MAPSURF_16BIT | MT_16BIT_L16); break; case MESA_FORMAT_RGBA_DXT1: @@ -604,8 +609,13 @@ static void i915ImportTexObjState( struct gl_texture_object *texObj ) shadow = SS2_SHADOW_ENABLE; shadow |= intel_translate_compare_func( texObj->CompareFunc ); - minFilt = FILTER_4X4_FLAT; - magFilt = FILTER_4X4_FLAT; + if (texObj->Target == GL_TEXTURE_1D) { + minFilt = FILTER_NEAREST; + magFilt = FILTER_NEAREST; + } else { + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; + } } diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index bb5ce6473e1..9bca64ace81 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -117,6 +117,8 @@ const GLubyte *intelGetString( GLcontext *ctx, GLenum name ) chipset = "Intel(R) 865G"; break; case PCI_CHIP_I915_G: chipset = "Intel(R) 915G"; break; + case PCI_CHIP_E7221_G: + chipset = "Intel (R) E7221G (i915)"; break; case PCI_CHIP_I915_GM: chipset = "Intel(R) 915GM"; break; case PCI_CHIP_I945_G: diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index 50e61789002..634d5816276 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -361,6 +361,8 @@ do { \ #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) + #define INTEL_FIREVERTICES(intel) \ do { \ if ((intel)->prim.flush) \ @@ -451,6 +453,7 @@ extern int INTEL_DEBUG; #define PCI_CHIP_I855_GM 0x3582 #define PCI_CHIP_I865_G 0x2572 #define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A #define PCI_CHIP_I915_GM 0x2592 #define PCI_CHIP_I945_G 0x2772 #define PCI_CHIP_I945_GM 0x27A2 diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c index ca8610b4965..a66cfd62792 100644 --- a/src/mesa/drivers/dri/i915/intel_screen.c +++ b/src/mesa/drivers/dri/i915/intel_screen.c @@ -53,7 +53,7 @@ DRI_CONF_BEGIN DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_SECTION_END DRI_CONF_END; const GLuint __driNConfigOptions = 4; @@ -511,6 +511,7 @@ static GLboolean intelCreateContext( const __GLcontextModes *mesaVis, sharedContextPrivate ); case PCI_CHIP_I915_G: + case PCI_CHIP_E7221_G: case PCI_CHIP_I915_GM: case PCI_CHIP_I945_G: case PCI_CHIP_I945_GM: diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c index e5988a5ed6c..b333ec54bbd 100644 --- a/src/mesa/drivers/dri/i915/intel_state.c +++ b/src/mesa/drivers/dri/i915/intel_state.c @@ -189,12 +189,12 @@ static void intelDrawBuffer(GLcontext *ctx, GLenum mode ) if ( intel->sarea->pf_current_page == 1 ) front ^= 1; - intelSetFrontClipRects( intel ); - if (front) { + intelSetFrontClipRects( intel ); intel->drawRegion = &intel->intelScreen->front; intel->readRegion = &intel->intelScreen->front; } else { + intelSetBackClipRects( intel ); intel->drawRegion = &intel->intelScreen->back; intel->readRegion = &intel->intelScreen->back; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index b2787ee60ac..3c5ed4717f3 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -202,12 +202,19 @@ static void intel_wpos_triangle( intelContextPtr intel, { GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; - - __memcpy( ((char *)v0) + offset, v0, size ); - __memcpy( ((char *)v1) + offset, v1, size ); - __memcpy( ((char *)v2) + offset, v2, size ); + GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); + GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset); + GLfloat *v2_wpos = (GLfloat *)((char *)v2 + offset); + + __memcpy(v0_wpos, v0, size); + __memcpy(v1_wpos, v1, size); + __memcpy(v2_wpos, v2, size); - intel_draw_triangle( intel, v0, v1, v2 ); + v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h; + v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h; + v2_wpos[1] = -v2_wpos[1] + intel->driDrawable->h; + + intel_draw_triangle(intel, v0, v1, v2); } @@ -217,9 +224,14 @@ static void intel_wpos_line( intelContextPtr intel, { GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; + GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); + GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset); + + __memcpy(v0_wpos, v0, size); + __memcpy(v1_wpos, v1, size); - __memcpy( ((char *)v0) + offset, v0, size ); - __memcpy( ((char *)v1) + offset, v1, size ); + v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h; + v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h; intel_draw_line( intel, v0, v1 ); } @@ -230,8 +242,10 @@ static void intel_wpos_point( intelContextPtr intel, { GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; + GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); - __memcpy( ((char *)v0) + offset, v0, size ); + __memcpy(v0_wpos, v0, size); + v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h; intel_draw_point( intel, v0 ); } diff --git a/src/mesa/drivers/dri/i915tex/i915_fragprog.c b/src/mesa/drivers/dri/i915tex/i915_fragprog.c index cbea6092a81..6de605b2e88 100644 --- a/src/mesa/drivers/dri/i915tex/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915tex/i915_fragprog.c @@ -94,7 +94,7 @@ src_vector(struct i915_fragment_program *p, break; case FRAG_ATTRIB_FOGC: src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); - src = swizzle(src, W, W, W, W); + src = swizzle(src, W, ZERO, ZERO, ONE); break; case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: diff --git a/src/mesa/drivers/dri/i915tex/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915tex/intel_mipmap_tree.c index d33a7d409b2..4b9fda50d8c 100644 --- a/src/mesa/drivers/dri/i915tex/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i915tex/intel_mipmap_tree.c @@ -365,6 +365,7 @@ intel_miptree_image_data(struct intel_context *intel, } } +extern GLuint intel_compressed_alignment(GLenum); /* Copy mipmap image between trees */ void @@ -382,8 +383,12 @@ intel_miptree_image_copy(struct intel_context *intel, const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level); GLuint i; - if (dst->compressed) - height /= 4; + if (dst->compressed) { + GLuint alignment = intel_compressed_alignment(dst->internal_format); + height = (height + 3) / 4; + width = ((width + alignment - 1) & ~(alignment - 1)); + } + for (i = 0; i < depth; i++) { intel_region_copy(intel->intelScreen, dst->region, dst_offset + dst_depth_offset[i], diff --git a/src/mesa/drivers/dri/i915tex/intel_screen.c b/src/mesa/drivers/dri/i915tex/intel_screen.c index 2acdead63d5..181390f6215 100644 --- a/src/mesa/drivers/dri/i915tex/intel_screen.c +++ b/src/mesa/drivers/dri/i915tex/intel_screen.c @@ -56,7 +56,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_SECTION_END DRI_CONF_END; const GLuint __driNConfigOptions = 4; diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 9e4ff112dc3..66de6f583c3 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -70,6 +70,7 @@ DRIVER_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ + brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 8a1d1527db3..1d7a3cb74e6 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -76,8 +76,8 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0]; cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0]; - if (brw->attribs.Stencil->TestTwoSide) { - cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide; + if (brw->attribs.Stencil->_TestTwoSide) { + cc.cc0.bf_stencil_enable = brw->attribs.Stencil->_TestTwoSide; cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]); cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]); cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]); @@ -90,7 +90,8 @@ static void upload_cc_unit( struct brw_context *brw ) /* Not really sure about this: */ if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + (brw->attribs.Stencil->_TestTwoSide && + brw->attribs.Stencil->WriteMask[1])) cc.cc0.stencil_write_enable = 1; } diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index 49b2770a514..2a65697325b 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:16; + GLuint attrs:32; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -51,7 +51,7 @@ struct brw_clip_prog_key { GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:1; + GLuint pad0:17; GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 37a25a98fe3..5e7d644d4e0 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,7 +43,8 @@ static void upload_clip_unit( struct brw_context *brw ) memset(&clip, 0, sizeof(clip)); /* CACHE_NEW_CLIP_PROG */ - clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16; + clip.thread0.grf_reg_count = + ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1; clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index 918e0001870..57ebf388f58 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -220,8 +220,8 @@ static void apply_one_offset( struct brw_clip_compile *c, struct brw_indirect vert ) { struct brw_compile *p = &c->func; - struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); - struct brw_reg z = get_element(pos, 2); + struct brw_reg z = deref_1f(vert, c->header_position_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); brw_ADD(p, z, z, vec1(c->reg.offset)); } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 397a9bd3f5c..e19d36eb493 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -44,6 +44,8 @@ #include "api_noop.h" #include "vtxfmt.h" +#include "shader/shader_api.h" + /*************************************** * Mesa's Driver Functions ***************************************/ @@ -60,12 +62,21 @@ static const struct dri_extension brw_extensions[] = { NULL, NULL } }; +static void brwUseProgram(GLcontext *ctx, GLuint program) +{ + _mesa_use_program(ctx, program); +} +static void brwInitProgFuncs( struct dd_function_table *functions ) +{ + functions->UseProgram = brwUseProgram; +} static void brwInitDriverFunctions( struct dd_function_table *functions ) { intelInitDriverFunctions( functions ); brwInitTextureFuncs( functions ); brwInitFragProgFuncs( functions ); + brwInitProgFuncs( functions ); } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 5bf0ed536ba..3343bedde96 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -305,7 +305,7 @@ static void upload_constant_buffer(struct brw_context *brw) if (!brw_pool_alloc(pool, bufsz, - 6, + 1 << 6, &brw->curbe.gs_offset)) { _mesa_printf("out of GS memory for curbe\n"); assert(0); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 101828b8ce6..d60d6c2b5f2 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -240,6 +240,8 @@ #define BRW_FRONTWINDING_CW 0 #define BRW_FRONTWINDING_CCW 1 +#define BRW_SPRITE_POINT_ENABLE 16 + #define BRW_INDEX_BYTE 0 #define BRW_INDEX_WORD 1 #define BRW_INDEX_DWORD 2 diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 6150cac4aa3..133d8f44fb7 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -291,7 +291,7 @@ static void get_space( struct brw_context *brw, struct gl_buffer_object **vbo_return, GLuint *offset_return ) { - size = (size + 63) & ~63; + size = ALIGN(size, 64); if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE) wrap_buffers(brw, size); @@ -593,6 +593,31 @@ void brw_upload_indices( struct brw_context *brw, ib_size, index_buffer->ptr, bufferobj); + } else { + /* If the index buffer isn't aligned to its element size, we have to + * rebase it into a temporary. + */ + if ((get_size(index_buffer->type) - 1) & offset) { + struct gl_buffer_object *vbo; + GLuint voffset; + GLubyte *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + bufferobj); + map += offset; + get_space(brw, ib_size, &vbo, &voffset); + + ctx->Driver.BufferSubData(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + voffset, + ib_size, + map, + vbo); + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + + bufferobj = vbo; + offset = voffset; + } } /* Emit the indexbuffer packet: diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9d46aac264f..c138d15fe82 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -335,14 +335,14 @@ static __inline struct brw_reg brw_imm_ud( GLuint ud ) static __inline struct brw_reg brw_imm_uw( GLushort uw ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); - imm.dw1.ud = uw; + imm.dw1.ud = uw | (uw << 16); return imm; } static __inline struct brw_reg brw_imm_w( GLshort w ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); - imm.dw1.d = w; + imm.dw1.d = w | (w << 16); return imm; } @@ -649,6 +649,16 @@ static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); } +static __inline struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) { return brw_address_reg(ptr.addr_subnr); @@ -669,7 +679,10 @@ static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offse return ptr; } - +static __inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} void brw_pop_insn_state( struct brw_compile *p ); void brw_push_insn_state( struct brw_compile *p ); @@ -809,9 +822,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size); -void brw_WHILE(struct brw_compile *p, +struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, @@ -861,5 +876,6 @@ void brw_math_invert( struct brw_compile *p, struct brw_reg dst, struct brw_reg src); - +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 1c717e418a8..e80eec2e5bb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -164,7 +164,7 @@ static void brw_set_src0( struct brw_instruction *insn, } -static void brw_set_src1( struct brw_instruction *insn, +void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); @@ -186,7 +186,7 @@ static void brw_set_src1( struct brw_instruction *insn, * in the future: */ assert (reg.address_mode == BRW_ADDRESS_DIRECT); - assert (reg.file == BRW_GENERAL_REGISTER_FILE); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; @@ -608,6 +608,34 @@ void brw_ENDIF(struct brw_compile *p, } } +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + /* DO/WHILE loop: */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) @@ -619,13 +647,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ - brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; return insn; } @@ -633,7 +663,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) -void brw_WHILE(struct brw_compile *p, +struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -657,14 +687,16 @@ void brw_WHILE(struct brw_compile *p, insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.jump_count = do_insn - insn + 1; insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 29a4e80ce1b..18a4537c323 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -40,11 +40,11 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { + GLuint attrs:32; GLuint primitive:4; - GLuint attrs:16; GLuint hint_gs_always:1; GLuint need_gs_prog:1; - GLuint pad:10; + GLuint pad:26; }; struct brw_gs_compile { diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 5826c01d4f9..5db4dd4603b 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -46,7 +46,8 @@ static void upload_gs_unit( struct brw_context *brw ) /* CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { - gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16; + gs.thread0.grf_reg_count = + ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1; gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; } diff --git a/src/mesa/drivers/dri/i965/brw_metaops.c b/src/mesa/drivers/dri/i965/brw_metaops.c index 6e030f191ef..cd6d287aa06 100644 --- a/src/mesa/drivers/dri/i965/brw_metaops.c +++ b/src/mesa/drivers/dri/i965/brw_metaops.c @@ -195,7 +195,7 @@ static void init_metaops_state( struct brw_context *brw ) vp_prog, strlen(vp_prog), brw->metaops.vp); - brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp; + brw->metaops.attribs.VertexProgram->_Current = brw->metaops.vp; brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE; brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 752fe49bcbf..389fd89d0c9 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -125,6 +125,9 @@ static void brwProgramStringNotify( GLcontext *ctx, struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; if (p == vp) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + if (p->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &p->program); + } p->id = brw->program_id++; p->param_state = p->program.Base.Parameters->StateFlags; diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 6dcfa628620..83e2314a027 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -74,6 +74,11 @@ static void compile_sf_prog( struct brw_context *brw, if (c.key.attrs & (1<<i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; + if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { + c.point_attrs[i].CoordReplace = + brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0]; + } else + c.point_attrs[i].CoordReplace = GL_FALSE; idx++; } @@ -90,7 +95,10 @@ static void compile_sf_prog( struct brw_context *brw, break; case SF_POINTS: c.nr_verts = 1; - brw_emit_point_setup( &c, GL_TRUE ); + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; @@ -162,7 +170,8 @@ static void upload_sf_prog( struct brw_context *brw ) break; } - + key.do_point_sprite = brw->attribs.Point->PointSprite; + key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; /* _NEW_LIGHT */ key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); @@ -179,7 +188,7 @@ static void upload_sf_prog( struct brw_context *brw ) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON), + .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index b321cda8c51..1aadc71de86 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -45,14 +45,19 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { + GLuint attrs:32; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; - GLuint attrs:16; GLuint frontface_ccw:1; - GLuint pad:11; + GLuint do_point_sprite:1; + GLuint pad:10; + GLenum SpriteOrigin; }; +struct brw_sf_point_tex { + GLboolean CoordReplace; +}; struct brw_sf_compile { struct brw_compile func; @@ -94,12 +99,14 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_anyprim_setup( struct brw_sf_compile *c ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 94be8154964..2f06cc5ec2c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -503,6 +503,90 @@ void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) } } +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + if (!tex->CoordReplace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + } + + if (tex->CoordReplace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->dx0, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + } + + { + brw_set_predicate_control_flag_value(p, pc); + if (tex->CoordReplace) { + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } else { + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + /* Points setup - several simplifications as all attributes are * constant across the face of the point (point sprites excluded!) */ @@ -569,6 +653,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; struct brw_instruction *jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -623,6 +708,19 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) } brw_land_fwd_jump(p, jmp); + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_point_sprite_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + brw_emit_point_setup( c, GL_FALSE ); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 2fd75a0c571..7445d59f5d7 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -38,6 +38,8 @@ static void upload_sf_vp(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; memset(&sfv, 0, sizeof(sfv)); @@ -47,14 +49,14 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ if (!brw->metaops.active) { - const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m; + const GLfloat *v = ctx->Viewport._WindowMap.m; sfv.viewport.m00 = v[MAT_SX]; sfv.viewport.m11 = - v[MAT_SY]; - sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; sfv.viewport.m30 = v[MAT_TX]; sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h; - sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; } else { sfv.viewport.m00 = 1; @@ -118,7 +120,7 @@ static void upload_sf_unit( struct brw_context *brw ) memset(&sf, 0, sizeof(sf)); /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16; + sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; @@ -184,6 +186,7 @@ static void upload_sf_unit( struct brw_context *brw ) /* _NEW_POINT */ sf.sf6.point_rast_rule = 1; /* opengl conventions */ sf.sf7.point_size = brw->attribs.Point->_Size * (1<<3); + sf.sf7.sprite_point = brw->attribs.Point->PointSprite; sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated; sf.sf7.aa_line_distance_mode = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 71c6938f9a3..5739c5c45e6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -149,7 +149,7 @@ GLuint brw_upload_cache( struct brw_cache *cache, GLuint hash = hash_key(key, key_size); void *tmp = _mesa_malloc(key_size + cache->aux_size); - if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) { + if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { /* Should not be possible: */ _mesa_printf("brw_pool_alloc failed\n"); diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c index b9926f2a5d7..cf7cdd05a88 100644 --- a/src/mesa/drivers/dri/i965/brw_state_pool.c +++ b/src/mesa/drivers/dri/i965/brw_state_pool.c @@ -41,10 +41,9 @@ GLboolean brw_pool_alloc( struct brw_mem_pool *pool, GLuint align, GLuint *offset_return) { - GLuint align_mask = (1<<align)-1; - GLuint fixup = ((pool->offset + align_mask) & ~align_mask) - pool->offset; + GLuint fixup = ALIGN(pool->offset, align) - pool->offset; - size = (size + 3) & ~3; + size = ALIGN(size, 4); if (pool->offset + fixup + size >= pool->size) { _mesa_printf("%s failed\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 9d4b9867d24..ad29316a42a 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -154,13 +154,19 @@ brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_RGB_S3TC: case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + case GL_RGBA_S3TC: case GL_RGBA4_S3TC: case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */ + return &_mesa_texformat_rgba_dxt5; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: @@ -168,6 +174,25 @@ brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_DEPTH_COMPONENT32: return &_mesa_texformat_z16; + case GL_SRGB_EXT: + case GL_SRGB8_EXT: + case GL_SRGB_ALPHA_EXT: + case GL_SRGB8_ALPHA8_EXT: + case GL_SLUMINANCE_EXT: + case GL_SLUMINANCE8_EXT: + case GL_SLUMINANCE_ALPHA_EXT: + case GL_SLUMINANCE8_ALPHA8_EXT: + case GL_COMPRESSED_SRGB_EXT: + case GL_COMPRESSED_SRGB_ALPHA_EXT: + case GL_COMPRESSED_SLUMINANCE_EXT: + case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: + return &_mesa_texformat_srgba8; + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return &_mesa_texformat_srgb_dxt1; + default: fprintf(stderr, "unexpected texture format %s in %s\n", _mesa_lookup_enum_by_nr(internalFormat), diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index d4888a42fbf..427a1325dce 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -37,7 +37,6 @@ #include "intel_tex_layout.h" #include "macros.h" - GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) { /* XXX: these vary depending on image format: @@ -53,11 +52,20 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint pack_x_pitch, pack_x_nr; GLuint pack_y_pitch; GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; - mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; mt->total_height = 0; + + if (mt->compressed) { + align_w = intel_compressed_alignment(mt->internal_format); + mt->pitch = ALIGN(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + mt->pitch = ALIGN(mt->width0 * mt->cpp, 4) / mt->cpp; + pack_y_pitch = ALIGN(mt->height0, align_h); + } - pack_y_pitch = MAX2(mt->height0, 2); pack_x_pitch = mt->pitch; pack_x_nr = 1; @@ -83,20 +91,30 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t mt->total_height += y; - - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - } - width = minify(width); height = minify(height); depth = minify(depth); + + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 4ca6e99db0b..76d0c291be6 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -53,7 +53,7 @@ static const struct { GLuint min_entry_size; GLuint max_entry_size; } limits[CS+1] = { - { 8, 32, 1, 5 }, /* vs */ + { 16, 32, 1, 5 }, /* vs */ { 4, 8, 1, 5 }, /* gs */ { 6, 8, 1, 5 }, /* clp */ { 1, 8, 1, 12 }, /* sf */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index fdb5785d67d..36636b5ffd6 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -67,6 +67,12 @@ struct brw_vs_compile { struct brw_reg r1; struct brw_reg regs[PROGRAM_ADDRESS+1][128]; struct brw_reg tmp; + struct brw_reg stack; + + struct { + GLboolean used_in_src; + struct brw_reg reg; + } output_regs[128]; struct brw_reg userplane[6]; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index c38e998d48f..f8a4432fb7f 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -134,6 +134,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) WRITEMASK_X); reg++; } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; /* Some opcodes need an internal temporary: @@ -213,57 +223,65 @@ static void unalias2( struct brw_vs_compile *c, } } +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint cond) +{ + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control_flag_value(p, 0xff); +} +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} - +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} static void emit_slt( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - /* Could be done with an if/else/endif, but this method uses half - * the instructions. Note that we are careful to reference the - * arguments before writing the dest. That means we emit the - * instructions in an odd order and have to play with the flag - * values. - */ - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - - /* Write all values to 1: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(1.0)); + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} - /* Where the test succeeded, overwite with zero: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_pop_insn_state(p); +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); } +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} static void emit_sge( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - - /* Write all values to zero: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0)); - - /* Where the test succeeded, overwite with 1: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - brw_pop_insn_state(p); + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); } - static void emit_max( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, @@ -592,9 +610,13 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: - case PROGRAM_STATE_VAR: assert(c->regs[file][index].nr != 0); return c->regs[file][index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -668,28 +690,28 @@ static void emit_arl( struct brw_vs_compile *c, * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register src ) + struct prog_src_register *src ) { struct brw_reg reg; - if (src.File == PROGRAM_UNDEFINED) + if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src.RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); else - reg = get_reg(c, src.File, src.Index); + reg = get_reg(c, src->File, src->Index); /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0), - GET_SWZ(src.Swizzle, 1), - GET_SWZ(src.Swizzle, 2), - GET_SWZ(src.Swizzle, 3)); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); /* Note this is ok for non-swizzle instructions: */ - reg.negate = src.NegateBase ? 1 : 0; + reg.negate = src->NegateBase ? 1 : 0; return reg; } @@ -891,17 +913,50 @@ static void emit_vertex_write( struct brw_vs_compile *c) } - - +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, target_insn; + struct prog_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->vp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case OPCODE_CAL: + case OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } +} /* Emit the fragment program instructions here. */ -void brw_vs_emit( struct brw_vs_compile *c ) +void brw_vs_emit(struct brw_vs_compile *c ) { +#define MAX_IFSN 32 struct brw_compile *p = &c->func; GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn; + GLuint insn, if_insn = 0; + struct brw_instruction *end_inst; + struct brw_instruction *if_inst[MAX_IFSN]; + struct brw_indirect stack_index = brw_indirect(0, 0); + GLuint index; + GLuint file; if (INTEL_DEBUG & DEBUG_VS) { _mesa_printf("\n\n\nvs-emit:\n"); @@ -912,9 +967,24 @@ void brw_vs_emit( struct brw_vs_compile *c ) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { + GLuint i; + struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + GLuint index = src->Index; + GLuint file = src->File; + if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) + c->output_regs[index].used_in_src = GL_TRUE; + } + } + /* Static register allocation */ brw_vs_alloc_regs(c); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { @@ -924,17 +994,29 @@ void brw_vs_emit( struct brw_vs_compile *c ) /* Get argument regs. SWZ is special and does this itself. */ + inst->Data = &p->store[p->nr_insn]; if (inst->Opcode != OPCODE_SWZ) - for (i = 0; i < 3; i++) - args[i] = get_arg(c, inst->SrcReg[i]); + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, src); + } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So * care needs to be taken emitting multi-operation instructions. - */ - dst = get_dst(c, inst->DstReg); + */ + index = inst->DstReg.Index; + file = inst->DstReg.File; + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, inst->DstReg); - switch (inst->Opcode) { case OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); @@ -1003,12 +1085,25 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); break; + + case OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; case OPCODE_SGE: emit_sge(p, dst, args[0], args[1]); break; + case OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; case OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; + case OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; case OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; @@ -1021,21 +1116,60 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; + case OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case OPCODE_RET: + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); case OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; case OPCODE_PRINT: + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: break; default: + _mesa_printf("Unsupport opcode %d in vertex shader\n", inst->Opcode); break; } + if (inst->DstReg.File == PROGRAM_OUTPUT + &&inst->DstReg.Index != VERT_RESULT_HPOS + &&c->output_regs[inst->DstReg.Index].used_in_src) + brw_MOV(p, get_dst(c, inst->DstReg), dst); + release_tmps(c); } + end_inst = &p->store[p->nr_insn]; emit_vertex_write(c); - + post_vs_emit(c, end_inst); + for (insn = 0; insn < nr_insns; insn++) + c->vp->program.Base.Instructions[insn].Data = NULL; } - - - - - diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c225bf8f5c5..f561979138c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw ) /* CACHE_NEW_VS_PROG */ vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16; + vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs.thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index 14483b325a9..c06ef5c374b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -524,10 +524,13 @@ static void emit_op3fn(struct tnl_program *p, GLuint nr = p->program->Base.NumInstructions++; if (nr >= p->nr_instructions) { + int new_nr_instructions = p->nr_instructions * 2; + p->program->Base.Instructions = _mesa_realloc(p->program->Base.Instructions, sizeof(struct prog_instruction) * p->nr_instructions, - sizeof(struct prog_instruction) * (p->nr_instructions *= 2)); + sizeof(struct prog_instruction) * new_nr_instructions); + p->nr_instructions = new_nr_instructions; } { @@ -1167,6 +1170,11 @@ static void build_fog( struct tnl_program *p ) } else { input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); + if (p->state->fog_option && + p->state->tnl_do_vertex_fog) + input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); + else + input = register_input(p, VERT_ATTRIB_FOG); } if (p->state->fog_option && @@ -1575,7 +1583,7 @@ static void update_tnl_program( struct brw_context *brw ) struct gl_vertex_program *old = brw->tnl_program; /* _NEW_PROGRAM */ - if (brw->attribs.VertexProgram->_Enabled) + if (brw->attribs.VertexProgram->_Current) return; /* Grab all the relevent state and put it in a single structure: @@ -1622,7 +1630,8 @@ const struct brw_tracked_state brw_tnl_vertprog = { _NEW_FOG | _NEW_HINT | _NEW_POINT | - _NEW_TEXTURE), + _NEW_TEXTURE | + _NEW_TEXTURE_MATRIX), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_INPUT_VARYING), .cache = 0 @@ -1638,8 +1647,8 @@ static void update_active_vertprog( struct brw_context *brw ) const struct gl_vertex_program *prev = brw->vertex_program; /* NEW_PROGRAM */ - if (brw->attribs.VertexProgram->_Enabled) { - brw->vertex_program = brw->attribs.VertexProgram->Current; + if (brw->attribs.VertexProgram->_Current) { + brw->vertex_program = brw->attribs.VertexProgram->_Current; } else { /* BRW_NEW_TNL_PROGRAM */ diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index f80ba178039..b2ad0f7ba9d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -29,7 +29,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ - +#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" @@ -66,7 +66,11 @@ GLuint brw_wm_nr_args( GLuint opcode ) case OPCODE_POW: case OPCODE_SUB: case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SLE: case OPCODE_SLT: + case OPCODE_SEQ: + case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: @@ -150,46 +154,50 @@ static void do_wm_prog( struct brw_context *brw, c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); + if (brw_wm_is_glsl(&c->fp->program)) { + brw_wm_glsl_emit(brw, c); + } else { + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Hal optimization - */ - brw_wm_pass_hal (c); + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Hal optimization + */ + brw_wm_pass_hal (c); - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - /* This is where we start emitting gen4 code: - */ - brw_init_compile(brw, &c->func); - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; + /* Register allocation. + */ + c->grf_limit = BRW_WM_MAX_GRF/2; + + /* This is where we start emitting gen4 code: + */ + brw_init_compile(brw, &c->func); + + brw_wm_pass2(c); + + c->prog_data.total_grf = c->max_wm_grf; + if (c->last_scratch) { + c->prog_data.total_scratch = + c->last_scratch + 0x40; + } else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); } - /* Emit GEN4 code. - */ - brw_wm_emit(c); - /* get the program */ program = brw_get_program(&c->func, &program_size); @@ -242,7 +250,8 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + (brw->attribs.Stencil->_TestTwoSide && + brw->attribs.Stencil->WriteMask[1])) lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; } @@ -284,7 +293,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1]; + key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); /* _NEW_LIGHT */ key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT); @@ -301,11 +310,38 @@ static void brw_wm_populate_key( struct brw_context *brw, key->shadowtex_mask |= 1<<i; } - if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1<<i; + if (t->Image[0][t->BaseLevel]->TexFormat->MesaFormat == + MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1<< i; + } } } - + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * This pretty much becomes moot with DRI2 and redirected buffers anyway, + * as our origins will always be zero then. + */ + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } /* Extra info: */ @@ -344,6 +380,7 @@ const struct brw_tracked_state brw_wm_prog = { _NEW_POLYGON | _NEW_LINE | _NEW_LIGHT | + _NEW_BUFFERS | _NEW_TEXTURE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f5fddfdb68a..9fb231d8689 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -69,9 +69,12 @@ struct brw_wm_prog_key { GLuint runtime_check_aads_emit:1; GLuint yuvtex_mask:8; - GLuint pad1:24; + GLuint yuvtex_swap_mask:8; /* UV swaped */ + GLuint pad1:16; GLuint program_string_id:32; + GLuint origin_x, origin_y; + GLuint drawable_height; }; @@ -194,6 +197,7 @@ struct brw_wm_compile { GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; + GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; @@ -231,6 +235,15 @@ struct brw_wm_compile { GLuint grf_limit; GLuint max_wm_grf; GLuint last_scratch; + + struct { + GLboolean inited; + struct brw_reg reg; + } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; + struct brw_reg emit_mask_reg; + GLuint reg_index; + GLuint tmp_index; }; @@ -259,4 +272,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ); +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 80bd5763da0..fd66631d119 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -122,26 +122,30 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. + struct brw_compile *p = &c->func; + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. */ if (mask & WRITEMASK_X) { - brw_MOV(p, + /* X' = X - origin */ + brw_ADD(p, dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW)); + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); } if (mask & WRITEMASK_Y) { - /* TODO -- window_height - Y */ - brw_MOV(p, + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_UW))); - + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); } } @@ -219,6 +223,10 @@ static void emit_pinterp( struct brw_compile *p, if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } + } + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { brw_MUL(p, dst[i], dst[i], w[3]); } } @@ -229,20 +237,20 @@ static void emit_cinterp( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ - } - } + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ + } + } } @@ -343,11 +351,10 @@ static void emit_lrp( struct brw_compile *p, } } } - - -static void emit_slt( struct brw_compile *p, +static void emit_sop( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, + GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { @@ -356,34 +363,66 @@ static void emit_slt( struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], brw_imm_f(0)); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); brw_MOV(p, dst[i], brw_imm_f(1.0)); brw_set_predicate_control_flag_value(p, 0xff); } } } -/* Isn't this just the same as the above with the args swapped? - */ -static void emit_sge( struct brw_compile *p, +static void emit_slt( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - GLuint i; + emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); +} - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); - brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control_flag_value(p, 0xff); - } - } +static void emit_sle( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); +} + +static void emit_sgt( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); +} + +static void emit_sge( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); } +static void emit_seq( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); +} +static void emit_sne( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); +} static void emit_cmp( struct brw_compile *p, const struct brw_reg *dst, @@ -465,6 +504,9 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -482,6 +524,9 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -500,6 +545,9 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -543,8 +591,11 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - function == BRW_MATH_FUNCTION_SINCOS); + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + + //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || + // function == BRW_MATH_FUNCTION_SINCOS); brw_MOV(p, brw_message_reg(2), arg0[0]); @@ -567,6 +618,9 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_push_insn_state(p); @@ -670,7 +724,6 @@ static void emit_tex( struct brw_wm_compile *c, responseLength, msgLength, 0); - } @@ -1081,7 +1134,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_WPOSXY: - emit_wpos_xy(p, dst, dst_flags, args[0]); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_PIXELW: @@ -1209,9 +1262,21 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_slt(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SLE: + emit_sle(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SGT: + emit_sgt(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SEQ: + emit_seq(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SNE: + emit_sne(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_LIT: emit_lit(p, dst, dst_flags, args[0]); @@ -1232,7 +1297,8 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - assert(0); + _mesa_printf("unsupport opcode %d in fragment program\n", + inst->opcode); } for (i = 0; i < 4; i++) diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index dc57fd263c2..f895f968b52 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -144,7 +144,7 @@ static struct prog_dst_register dst_undef( void ) static struct prog_dst_register get_temp( struct brw_wm_compile *c ) { - int bit = ffs( ~c->fp_temp ); + int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { _mesa_printf("%s: out of temporaries\n", __FILE__); @@ -158,7 +158,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) { - c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP); + c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); } @@ -176,6 +176,7 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); *inst = *inst0; + inst->Data = (void *)inst0; return inst; } @@ -201,7 +202,6 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; - return inst; } @@ -361,6 +361,37 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<<idx; } +static void emit_ddx( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint idx = inst->SrcReg[0].Index; + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + + c->fp_deriv_emitted |= 1<<idx; + emit_op(c, + OPCODE_DDX, + inst->DstReg, + 0, 0, 0, + interp, + get_pixel_w(c), + src_undef()); +} + +static void emit_ddy( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint idx = inst->SrcReg[0].Index; + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + + c->fp_deriv_emitted |= 1<<idx; + emit_op(c, + OPCODE_DDY, + inst->DstReg, + 0, 0, 0, + interp, + get_pixel_w(c), + src_undef()); +} /*********************************************************************** * Hacks to extend the program parameter and constant lists. @@ -463,17 +494,20 @@ static void precalc_dst( struct brw_wm_compile *c, if (dst.WriteMask & WRITEMASK_XZ) { + struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); /* dst.xz = swz src0.1zzz */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XZ), + inst->SaturateMode, 0, 0, + src_swizzle(src0, SWIZZLE_ONE, z, z, z), + src_undef(), + src_undef()); + /* Avoid letting negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].NegateBase &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -496,15 +530,19 @@ static void precalc_lit( struct brw_wm_compile *c, struct prog_dst_register dst = inst->DstReg; if (dst.WriteMask & WRITEMASK_XW) { + struct prog_instruction *swz; + /* dst.xw = swz src0.1111 */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XW), + 0, 0, 0, + src_swizzle1(src0, SWIZZLE_ONE), + src_undef(), + src_undef()); + /* Avoid letting the negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].NegateBase = 0; } @@ -618,17 +656,21 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); } else { + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<inst->TexSrcUnit); + /* CONST C0 = { -.5, -.0625, -.5, 1.164 } CONST C1 = { 1.596, -0.813, 2.018, -.391 } UYV = TEX ... UYV.xyz = ADD UYV, C0 UYV.y = MUL UYV.y, C0.w - RGB.xyz = MAD UYV.xxz, C1, UYV.y + if (UV swaped) + RGB.xyz = MAD UYV.zzx, C1, UYV.y + else + RGB.xyz = MAD UYV.xxz, C1, UYV.y RGB.y = MAD UYV.z, C1.w, RGB.y */ struct prog_dst_register dst = inst->DstReg; - struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register tmp = get_temp(c); struct prog_src_register tmpsrc = src_reg_from_dst(tmp); struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); @@ -642,7 +684,7 @@ static void precalc_tex( struct brw_wm_compile *c, inst->SaturateMode, inst->TexSrcUnit, inst->TexSrcTarget, - src0, + coord, src_undef(), src_undef()); @@ -658,6 +700,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* YUV.y = MUL YUV.y, C0.w */ + emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), @@ -666,13 +709,18 @@ static void precalc_tex( struct brw_wm_compile *c, src_swizzle1(C0, W), src_undef()); - /* RGB.xyz = MAD YUV.xxz, C1, YUV.y + /* + * if (UV swaped) + * RGB.xyz = MAD YUV.zzx, C1, YUV.y + * else + * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ + emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, 0, 0, - src_swizzle(tmpsrc, X,X,Z,Z), + swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); @@ -689,7 +737,8 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp); } - if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || + (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); } @@ -710,7 +759,7 @@ static GLboolean projtex( struct brw_wm_compile *c, return 0; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<src.Index)) == 0) + (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) return 0; else return 1; @@ -939,7 +988,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_LIT: precalc_lit(c, inst); break; - + + case OPCODE_TEX: + precalc_tex(c, inst); + break; + case OPCODE_TXP: precalc_txp(c, inst); break; @@ -957,8 +1010,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - + case OPCODE_DDX: + emit_ddx(c, inst); + break; + case OPCODE_DDY: + emit_ddy(c, inst); + break; case OPCODE_END: + emit_fog(c); + emit_fb_write(c); + break; case OPCODE_PRINT: break; @@ -967,15 +1028,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) break; } } - - emit_fog(c); - emit_fb_write(c); - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("\n\n\npass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("\n\n\npass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c new file mode 100644 index 00000000000..5a1f80dc41f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -0,0 +1,1370 @@ +#include "macros.h" +#include "shader/prog_parameter.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" + +/* Only guess, need a flag in gl_fragment_program later */ +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) +{ + int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { + struct prog_instruction *inst = &fp->Base.Instructions[i]; + switch (inst->Opcode) { + case OPCODE_IF: + case OPCODE_INT: + case OPCODE_ENDIF: + case OPCODE_CAL: + case OPCODE_BRK: + case OPCODE_RET: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_BGNLOOP: + return GL_TRUE; + default: + break; + } + } + return GL_FALSE; +} + +static void set_reg(struct brw_wm_compile *c, int file, int index, + int component, struct brw_reg reg) +{ + c->wm_regs[file][index][component].reg = reg; + c->wm_regs[file][index][component].inited = GL_TRUE; +} + +static int get_scalar_dst_index(struct prog_instruction *inst) +{ + int i; + for (i = 0; i < 4; i++) + if (inst->DstReg.WriteMask & (1<<i)) + break; + return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + struct brw_reg reg; + reg = brw_vec8_grf(c->tmp_index--, 0); + return reg; +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 127; +} + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +{ + struct brw_reg reg; + switch (file) { + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + file = PROGRAM_STATE_VAR; + break; + case PROGRAM_UNDEFINED: + return brw_null_reg(); + default: + break; + } + + if(c->wm_regs[file][index][component].inited) + reg = c->wm_regs[file][index][component].reg; + else + reg = brw_vec8_grf(c->reg_index, 0); + + if(!c->wm_regs[file][index][component].inited) { + set_reg(c, file, index, component, reg); + c->reg_index++; + } + + if (neg & (1<< component)) { + reg = negate(reg); + } + if (abs) + reg = brw_abs(reg); + return reg; +} + +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + struct brw_reg reg; + int nr_interp_regs = 0; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + + for (i = 0; i < 4; i++) { + reg = (i < c->key.nr_depth_regs) + ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); + } + c->reg_index += 2*c->key.nr_depth_regs; + { + int nr_params = c->fp->program.Base.Parameters->NumParameters; + struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + c->prog_data.nr_params = 4*nr_params; + for (i = 0; i < nr_params; i++) { + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index/8, + index%8); + c->prog_data.param[index] = + &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + c->nr_creg = 2*((4*nr_params+15)/16); + c->reg_index += c->nr_creg; + } + for (i = 0; i < FRAG_ATTRIB_MAX; i++) { + if (inputs & (1<<i)) { + nr_interp_regs++; + reg = brw_vec8_grf(c->reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, i, j, reg); + c->reg_index += 2; + + } + } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.curb_read_length = c->nr_creg; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; +} + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct prog_instruction *inst, int component, int nr) +{ + return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, + 0, 0); +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct prog_src_register *src, int index, int nr) +{ + int component = GET_SWZ(src->Swizzle, index); + return get_reg(c, src->File, src->Index, component, nr, + src->NegateBase, src->Abs); +} + +static void emit_abs( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1); + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_MOV(p, dst, brw_abs(src)); + } + } + brw_set_saturate(p, 0); +} + +static void emit_int( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1) ; + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_RNDD(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mov( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1); + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_MOV(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_pixel_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + struct brw_reg dst0, dst1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0, 1); + dst1 = get_dst_reg(c, inst, 1, 1); + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } + +} + +static void emit_delta_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg dst0, dst1, src0, src1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0, 1); + dst1 = get_dst_reg(c, inst, 1, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst0, + retype(src0, BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst1, + retype(src1, BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } + +} + + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr ) +{ + struct brw_compile *p = &c->func; + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); +} + +static void emit_fb_write(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + struct brw_reg src0;//, src1, src2, dst; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + if (c->key.source_depth_to_render_target) + { + if (c->key.computes_depth) { + src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } else { + src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + + fire_fb_write(c, 0, nr); +} + +static void emit_pixel_w( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + if (mask & WRITEMASK_W) { + struct brw_reg dst, src0, delta0, delta1; + struct brw_reg interp3; + + dst = get_dst_reg(c, inst, 3, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + + interp3 = brw_vec1_grf(src0.nr+1, 4); + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, delta0); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); + + /* Calc w */ + brw_math_16( p, dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + +static void emit_linterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), delta1); + } + } +} + +static void emit_cinterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, src0; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_pinterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0, w; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + w = get_src_reg(c, &inst->SrcReg[2], 3, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), + delta1); + brw_MUL(p, dst, dst, w); + } + } +} + +static void emit_xpd(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + for (i = 0; i < 4; i++) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i, 1); + src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); + src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct prog_instruction *inst, GLuint func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_rcp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); +} + +static void emit_rsq(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); +} + +static void emit_sin(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); +} + +static void emit_cos(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); +} + +static void emit_ex2(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); +} + +static void emit_lg2(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); +} + +static void emit_add(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_ADD(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_sub(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_ADD(p, dst, src0, negate(src1)); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mul(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_MUL(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_frc(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_FRC(p, dst, src0); + } + } + if (inst->SaturateMode != SATURATE_OFF) + brw_set_saturate(p, 0); +} + +static void emit_flr(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_RNDD(p, dst, src0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_max(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct prog_instruction *inst, GLuint cond) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); +} + +static void emit_slt(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_L); +} + +static void emit_sle(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_LE); +} + +static void emit_sgt(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_G); +} + +static void emit_sge(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_GE); +} + +static void emit_seq(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_EQ); +} + +static void emit_sne(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); +} + +static void emit_ddx(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + GLuint nr, i; + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + GLuint nr, i; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + nr = src0.nr; + w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_wpos_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0[2], dst[2]; + + dst[0] = get_dst_reg(c, inst, 0, 1); + dst[1] = get_dst_reg(c, inst, 1, 1); + + src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. + */ + if (mask & WRITEMASK_X) { + /* X' = X - origin_x */ + brw_ADD(p, + dst[0], + retype(src0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); + } + + if (mask & WRITEMASK_Y) { + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, + dst[1], + negate(retype(src0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + } +} + +/* TODO + BIAS on SIMD8 not workind yet... + */ +static void emit_txb(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i, 1); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->DstReg.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +} + +static void emit_tex(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + GLuint msg_len; + GLuint i, nr; + GLuint emit; + GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; + + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i, 1); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->DstReg.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +} + +static void post_wm_emit( struct brw_wm_compile *c ) +{ + GLuint nr_insns = c->fp->program.Base.NumInstructions; + GLuint insn, target_insn; + struct prog_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } +} + +static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) + +{ +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; + struct brw_instruction *inst0, *inst1; + int i, if_insn = 0, loop_insn = 0; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + + brw_init_compile(brw, &c->func); + c->reg_index = 0; + prealloc_reg(c); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + for (i = 0; i < c->nr_fp_insns; i++) { + struct prog_instruction *inst = &c->prog_instructions[i]; + struct prog_instruction *orig_inst; + + if ((orig_inst = inst->Data) != 0) + orig_inst->Data = current_insn(p); + + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + + switch (inst->Opcode) { + case WM_PIXELXY: + emit_pixel_xy(c, inst); + break; + case WM_DELTAXY: + emit_delta_xy(c, inst); + break; + case WM_PIXELW: + emit_pixel_w(c, inst); + break; + case WM_LINTERP: + emit_linterp(c, inst); + break; + case WM_PINTERP: + emit_pinterp(c, inst); + break; + case WM_CINTERP: + emit_cinterp(c, inst); + break; + case WM_WPOSXY: + emit_wpos_xy(c, inst); + break; + case WM_FB_WRITE: + emit_fb_write(c, inst); + break; + case OPCODE_ABS: + emit_abs(c, inst); + break; + case OPCODE_ADD: + emit_add(c, inst); + break; + case OPCODE_SUB: + emit_sub(c, inst); + break; + case OPCODE_FRC: + emit_frc(c, inst); + break; + case OPCODE_FLR: + emit_flr(c, inst); + break; + case OPCODE_LRP: + emit_lrp(c, inst); + break; + case OPCODE_INT: + emit_int(c, inst); + break; + case OPCODE_MOV: + emit_mov(c, inst); + break; + case OPCODE_DP3: + emit_dp3(c, inst); + break; + case OPCODE_DP4: + emit_dp4(c, inst); + break; + case OPCODE_XPD: + emit_xpd(c, inst); + break; + case OPCODE_DPH: + emit_dph(c, inst); + break; + case OPCODE_RCP: + emit_rcp(c, inst); + break; + case OPCODE_RSQ: + emit_rsq(c, inst); + break; + case OPCODE_SIN: + emit_sin(c, inst); + break; + case OPCODE_COS: + emit_cos(c, inst); + break; + case OPCODE_EX2: + emit_ex2(c, inst); + break; + case OPCODE_LG2: + emit_lg2(c, inst); + break; + case OPCODE_MAX: + emit_max(c, inst); + break; + case OPCODE_MIN: + emit_min(c, inst); + break; + case OPCODE_DDX: + emit_ddx(c, inst); + break; + case OPCODE_DDY: + emit_ddy(c, inst); + break; + case OPCODE_SLT: + emit_slt(c, inst); + break; + case OPCODE_SLE: + emit_sle(c, inst); + break; + case OPCODE_SGT: + emit_sgt(c, inst); + break; + case OPCODE_SGE: + emit_sge(c, inst); + break; + case OPCODE_SEQ: + emit_seq(c, inst); + break; + case OPCODE_SNE: + emit_sne(c, inst); + break; + case OPCODE_MUL: + emit_mul(c, inst); + break; + case OPCODE_POW: + emit_pow(c, inst); + break; + case OPCODE_MAD: + emit_mad(c, inst); + break; + case OPCODE_TEX: + emit_tex(c, inst); + break; + case OPCODE_TXB: + emit_txb(c, inst); + break; + case OPCODE_KIL_NV: + emit_kil(c); + break; + case OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: + break; + case OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + orig_inst = inst->Data; + orig_inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case OPCODE_BGNLOOP: + loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + loop_insn--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (inst0 > loop_inst[loop_insn]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + break; + default: + _mesa_printf("unsupported IR in fragment shader %d\n", + inst->Opcode); + } + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + post_wm_emit(c); + for (i = 0; i < c->fp->program.Base.NumInstructions; i++) + c->fp->program.Base.Instructions[i].Data = NULL; +} + +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + brw_wm_pass_fp(c); + c->tmp_index = 127; + brw_wm_emit_glsl(brw, c); + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 00f6f6b9a4f..1bfae5a069b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -168,6 +168,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_PAYLOAD: case PROGRAM_TEMPORARY: case PROGRAM_OUTPUT: + case PROGRAM_VARYING: break; case PROGRAM_LOCAL_PARAM: @@ -179,6 +180,8 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: case PROGRAM_NAMED_PARAM: { struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; @@ -197,6 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: /* These may change from run to run: */ ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index d668def7007..f6f3a38e9e0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -150,12 +150,17 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FLR: case OPCODE_FRC: case OPCODE_MOV: + case OPCODE_SWZ: read0 = writemask; break; case OPCODE_SUB: case OPCODE_SLT: + case OPCODE_SLE: case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SEQ: + case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: @@ -253,11 +258,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read0 = WRITEMASK_XYW; break; - case OPCODE_SWZ: case OPCODE_DST: case OPCODE_TXP: default: - assert(0); break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index a1edbd6168d..c1ce6a9b6b5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -328,7 +328,7 @@ void brw_wm_pass2( struct brw_wm_compile *c ) c->state = PASS2_DONE; if (INTEL_DEBUG & DEBUG_WM) { - brw_wm_print_program(c, "pass2/done"); + brw_wm_print_program(c, "pass2/done"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 794c7d98169..3c0952acf0d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -54,7 +54,7 @@ static GLuint translate_wrap_mode( GLenum wrap ) case GL_REPEAT: return BRW_TEXCOORDMODE_WRAP; case GL_CLAMP: - return BRW_TEXCOORDMODE_CLAMP_BORDER; /* conform likes it this way */ + return BRW_TEXCOORDMODE_CLAMP; case GL_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ case GL_CLAMP_TO_BORDER: diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 5b4f2abd0e2..7856da0896f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_wm.h" #include "bufmgr.h" /*********************************************************************** @@ -62,7 +63,7 @@ static void upload_wm_unit(struct brw_context *brw ) memset(&wm, 0, sizeof(wm)); /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16; + wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; @@ -71,7 +72,7 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.max_threads = max_threads; if (brw->wm.prog_data->total_scratch) { - GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024; + GLuint per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); GLuint total = per_thread * (max_threads + 1); /* Scratch space -- just have to make sure there is sufficient @@ -134,9 +135,13 @@ static void upload_wm_unit(struct brw_context *brw ) if (fp->UsesKill || brw->attribs.Color->AlphaEnabled) wm.wm5.program_uses_killpixel = 1; + + if (brw_wm_is_glsl(fp)) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; } - wm.wm5.enable_16_pix = 1; wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ wm.wm5.legacy_line_rast = 0; wm.wm5.legacy_global_depth_bias = 0; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d24c618a668..12cd08901c6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -69,7 +69,7 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -114,11 +114,29 @@ static GLuint translate_tex_format( GLuint mesa_format ) return BRW_SURFACEFORMAT_FXT1; case MESA_FORMAT_Z16: - return BRW_SURFACEFORMAT_L16_UNORM; + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I16_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A16_UNORM; + else + return BRW_SURFACEFORMAT_L16_UNORM; - case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; + return BRW_SURFACEFORMAT_DXT1_RGB; + + case MESA_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case MESA_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case MESA_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case MESA_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; default: assert(0); @@ -141,7 +159,7 @@ void brw_update_texture_surface( GLcontext *ctx, surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf->ss0.surface_type = translate_tex_target(tObj->Target); - surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat); + surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat, tObj->DepthMode); /* This is ok for all textures with channel width 8bit or less: */ @@ -181,11 +199,8 @@ static void upload_wm_surfaces(struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - struct brw_surface_binding_table bind; GLuint i; - memcpy(&bind, &brw->wm.bind, sizeof(bind)); - { struct brw_surface_state surf; struct intel_region *region = brw->state.draw_region; diff --git a/src/mesa/drivers/dri/i965/bufmgr_fake.c b/src/mesa/drivers/dri/i965/bufmgr_fake.c index fb4903dbc1d..3b7229c3bb3 100644 --- a/src/mesa/drivers/dri/i965/bufmgr_fake.c +++ b/src/mesa/drivers/dri/i965/bufmgr_fake.c @@ -169,7 +169,7 @@ static GLboolean alloc_from_pool( struct intel_context *intel, if (!block) return GL_FALSE; - sz = (buf->size + align-1) & ~(align-1); + sz = ALIGN(buf->size, align); block->mem = mmAllocMem(pool->heap, sz, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 64885ed9b4b..ab61d07426a 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -36,7 +36,7 @@ static void intel_batchbuffer_reset( struct intel_batchbuffer *batch ) assert(batch->map == NULL); batch->offset = (unsigned long)batch->ptr; - batch->offset = (batch->offset + 63) & ~63; + batch->offset = ALIGN(batch->offset, 64); batch->ptr = (unsigned char *) batch->offset; if (BATCH_SZ - batch->offset < BATCH_REFILL) { @@ -216,7 +216,7 @@ void intel_batchbuffer_align( struct intel_batchbuffer *batch, GLuint sz ) { unsigned long ptr = (unsigned long) batch->ptr; - unsigned long aptr = (ptr + align) & ~((unsigned long)align-1); + unsigned long aptr = ALIGN(ptr, align); GLuint fixup = aptr - ptr; if (intel_batchbuffer_space(batch) < fixup + sz) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index f88cbb2328d..d2068aa4b47 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -373,7 +373,7 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield flags) clear_depth = 0; if (flags & BUFFER_BIT_DEPTH) { - clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth); + clear_depth = (GLuint)(ctx->Depth.Clear * ctx->DrawBuffer->_DepthMax); } if (flags & BUFFER_BIT_STENCIL) { @@ -537,7 +537,8 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, { struct xy_setup_blit setup; struct xy_text_immediate_blit text; - int dwords = ((src_size + 7) & ~7) / 4; + int dwords = ALIGN(src_size, 8) / 4; + uint32_t opcode, br13; assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c index de6a867cca4..769f75c5324 100644 --- a/src/mesa/drivers/dri/i965/intel_buffers.c +++ b/src/mesa/drivers/dri/i965/intel_buffers.c @@ -545,15 +545,14 @@ static void intelDrawBuffer(GLcontext *ctx, GLenum mode ) if ( intel->sarea->pf_current_page == 1 ) front ^= 1; - intelSetFrontClipRects( intel ); - - if (front) { + intelSetFrontClipRects(intel); if (intel->draw_region != intel->front_region) { intel_region_release(intel, &intel->draw_region); intel_region_reference(&intel->draw_region, intel->front_region); } } else { + intelSetBackClipRects(intel); if (intel->draw_region != intel->back_region) { intel_region_release(intel, &intel->draw_region); intel_region_reference(&intel->draw_region, intel->back_region); diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c index 5ee52829623..8591e5c364b 100644 --- a/src/mesa/drivers/dri/i965/intel_context.c +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -66,6 +66,7 @@ int INTEL_DEBUG = (0); #endif +#define need_GL_NV_point_sprite #define need_GL_ARB_multisample #define need_GL_ARB_point_parameters #define need_GL_ARB_texture_compression @@ -81,6 +82,13 @@ int INTEL_DEBUG = (0); #define need_GL_EXT_fog_coord #define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color +#define need_GL_ATI_separate_stencil +#define need_GL_EXT_point_parameters +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#define need_GL_ARB_shader_objects +#define need_GL_ARB_vertex_shader + #include "extension_helper.h" #ifndef VERBOSE @@ -146,6 +154,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_NV_point_sprite", GL_NV_point_sprite_functions }, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_cube_map", NULL }, @@ -158,6 +167,8 @@ const struct dri_extension card_extensions[] = { "GL_NV_texture_rectangle", NULL }, { "GL_EXT_texture_rectangle", NULL }, { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_point_sprite", NULL}, + { "GL_ARB_point_parameters", NULL }, { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, @@ -171,18 +182,33 @@ const struct dri_extension card_extensions[] = { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, { "GL_EXT_stencil_wrap", NULL }, + /* Do not enable this extension. It conflicts with GL_ATI_separate_stencil + * and 2.0's separate stencil, because mesa's computed _TestTwoSide will + * only reflect whether it's enabled through this extension, even if the + * application is using the other interfaces. + */ +/*{ "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },*/ { "GL_EXT_texture_edge_clamp", NULL }, { "GL_EXT_texture_env_combine", NULL }, { "GL_EXT_texture_env_dot3", NULL }, { "GL_EXT_texture_filter_anisotropic", NULL }, { "GL_EXT_texture_lod_bias", NULL }, + { "GL_EXT_texture_sRGB", NULL }, { "GL_3DFX_texture_compression_FXT1", NULL }, { "GL_APPLE_client_storage", NULL }, { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, { "GL_SGIS_generate_mipmap", NULL }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions}, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions}, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions}, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions}, + { "GL_ARB_fragment_shader", NULL }, + /* XXX not implement yet, to compile builtin glsl lib */ + { "GL_ARB_draw_buffers", NULL }, { NULL, NULL } }; @@ -399,17 +425,10 @@ GLboolean intelInitContext( struct intel_context *intel, switch(mesaVis->depthBits) { case 0: /* what to do in this case? */ case 16: - intel->depth_scale = 1.0/0xffff; intel->polygon_offset_scale = 1.0/0xffff; - intel->depth_clear_mask = ~0; - intel->ClearDepth = 0xffff; break; case 24: - intel->depth_scale = 1.0/0xffffff; intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */ - intel->depth_clear_mask = 0x00ffffff; - intel->stencil_clear_mask = 0xff000000; - intel->ClearDepth = 0x00ffffff; break; default: assert(0); @@ -551,6 +570,8 @@ void intelDestroyContext(__DRIcontextPrivate *driContextPriv) #endif /* free the Mesa context */ + intel->ctx.VertexProgram.Current = NULL; + intel->ctx.FragmentProgram.Current = NULL; _mesa_destroy_context(&intel->ctx); } diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h index a2447574cad..a588514907e 100644 --- a/src/mesa/drivers/dri/i965/intel_context.h +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -183,12 +183,8 @@ struct intel_context GLubyte clear_chan[4]; GLuint ClearColor; - GLuint ClearDepth; - GLfloat depth_scale; GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ - GLuint depth_clear_mask; - GLuint stencil_clear_mask; GLboolean hw_stencil; GLboolean hw_stipple; @@ -269,6 +265,8 @@ void UNLOCK_HARDWARE( struct intel_context *intel ); #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) + /* ================================================================ * Color packing: */ diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 8548bc88bac..51778023caa 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -75,7 +75,7 @@ struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, mt->width0 = width0; mt->height0 = height0; mt->depth0 = depth0; - mt->cpp = compressed ? 2 : cpp; + mt->cpp = cpp; mt->compressed = compressed; switch (intel->intelScreen->deviceID) { @@ -128,7 +128,7 @@ int intel_miptree_pitch_align (struct intel_context *intel, int pitch) { if (!mt->compressed) - pitch = ((pitch * mt->cpp + 3) & ~3) / mt->cpp; + pitch = ALIGN(pitch * mt->cpp, 4) / mt->cpp; return pitch; } @@ -234,7 +234,7 @@ GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt, - +extern GLuint intel_compressed_alignment(GLenum); /* Upload data for a particular image. */ GLboolean intel_miptree_image_data(struct intel_context *intel, @@ -249,6 +249,17 @@ GLboolean intel_miptree_image_data(struct intel_context *intel, GLuint dst_offset = intel_miptree_image_offset(dst, face, level); const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); GLuint i; + GLuint width, height, alignment; + + width = dst->level[level].width; + height = dst->level[level].height; + + if (dst->compressed) { + alignment = intel_compressed_alignment(dst->internal_format); + src_row_pitch = ALIGN(src_row_pitch, alignment); + width = ALIGN(width, alignment); + height = (height + 3) / 4; + } DBG("%s\n", __FUNCTION__); for (i = 0; i < depth; i++) { @@ -260,8 +271,8 @@ GLboolean intel_miptree_image_data(struct intel_context *intel, src, src_row_pitch, 0, 0, /* source x,y */ - dst->level[level].width, - dst->level[level].height)) + width, + height)) return GL_FALSE; src += src_image_pitch; } diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index df9d6885c08..54a88c54b5d 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -91,11 +91,6 @@ static void set_bit( GLubyte *dest, dest[bit/8] |= 1 << (bit % 8); } -static int align(int x, int align) -{ - return (x + align - 1) & ~(align - 1); -} - /* Extract a rectangle's worth of data from the bitmap. Called * per-cliprect. */ @@ -147,7 +142,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, } if (row_align) - bit = (bit + row_align - 1) & ~(row_align - 1); + bit = ALIGN(bit, row_align); } return count; @@ -169,11 +164,8 @@ do_blit_bitmap( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct intel_region *dst = intel_drawbuf_region(intel); GLfloat tmpColor[4]; - - union { - GLuint ui; - GLubyte ub[4]; - } color; + GLubyte ubcolor[4]; + GLuint color8888, color565; if (!dst) return GL_FALSE; @@ -190,10 +182,14 @@ do_blit_bitmap( GLcontext *ctx, ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor); } - UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); + + color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]); + color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); + /* Does zoom apply to bitmaps? */ @@ -235,10 +231,10 @@ do_blit_bitmap( GLcontext *ctx, dsty = dPriv->y + (dPriv->h - dsty - height); dstx = dPriv->x + dstx; - dest_rect.x1 = dstx; - dest_rect.y1 = dsty; - dest_rect.x2 = dstx + width; - dest_rect.y2 = dsty + height; + dest_rect.x1 = dstx < 0 ? 0 : dstx; + dest_rect.y1 = dsty < 0 ? 0 : dsty; + dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width; + dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height; for (i = 0; i < nbox; i++) { drm_clip_rect_t rect; @@ -268,7 +264,7 @@ do_blit_bitmap( GLcontext *ctx, for (px = 0; px < box_w; px += DX) { int h = MIN2(DY, box_h - py); int w = MIN2(DX, box_w - px); - GLuint sz = align(align(w,8) * h, 64)/8; + GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY; @@ -292,7 +288,7 @@ do_blit_bitmap( GLcontext *ctx, dst->cpp, (GLubyte *)stipple, sz, - color.ui, + (dst->cpp == 2) ? color565 : color8888, dst->pitch, dst->buffer, 0, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 5dac50df32c..b08531ce7de 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -53,7 +53,7 @@ DRI_CONF_BEGIN DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_SECTION_END DRI_CONF_END; const GLuint __driNConfigOptions = 4; diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index cb23b9dd879..775b689da33 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -122,6 +122,29 @@ static void intel_texture_invalidate_cb( struct intel_context *intel, intel_texture_invalidate( (struct intel_texture_object *) ptr ); } +#include "texformat.h" +static GLuint intel_compressed_num_bytes(GLenum mesaFormat) +{ + GLuint bytes = 0; + + switch (mesaFormat) { + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + case MESA_FORMAT_RGB_DXT1: + case MESA_FORMAT_RGBA_DXT1: + bytes = 2; + break; + + case MESA_FORMAT_RGBA_DXT3: + case MESA_FORMAT_RGBA_DXT5: + bytes = 4; + + default: + break; + } + + return bytes; +} /* */ @@ -132,7 +155,8 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, GLuint face, i; GLuint nr_faces = 0; struct gl_texture_image *firstImage; - + GLuint cpp = 0; + if( tObj == intel->frame_buffer_texobj ) return GL_FALSE; @@ -165,6 +189,12 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, + if (firstImage->IsCompressed) { + cpp = intel_compressed_num_bytes(firstImage->TexFormat->MesaFormat); + } else { + cpp = firstImage->TexFormat->TexelBytes; + } + /* Check tree can hold all active levels. Check tree matches * target, imageFormat, etc. */ @@ -176,7 +206,7 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, intelObj->mt->width0 != firstImage->Width || intelObj->mt->height0 != firstImage->Height || intelObj->mt->depth0 != firstImage->Depth || - intelObj->mt->cpp != firstImage->TexFormat->TexelBytes || + intelObj->mt->cpp != cpp || intelObj->mt->compressed != firstImage->IsCompressed)) { intel_miptree_destroy(intel, intelObj->mt); @@ -199,7 +229,7 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, firstImage->Width, firstImage->Height, firstImage->Depth, - firstImage->TexFormat->TexelBytes, + cpp, firstImage->IsCompressed); /* Tell the buffer manager that we will manage the backing diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index 39a443c4f9f..cc736944b97 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -34,10 +34,21 @@ #include "intel_tex_layout.h" #include "macros.h" - -static int align(int value, int alignment) +GLuint intel_compressed_alignment(GLenum internalFormat) { - return (value + alignment - 1) & ~(alignment - 1); + GLuint alignment = 4; + + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + alignment = 8; + break; + + default: + break; + } + + return alignment; } void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ) @@ -51,17 +62,30 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr mt->pitch = mt->width0; + if (mt->compressed) { + align_w = intel_compressed_alignment(mt->internal_format); + mt->pitch = ALIGN(mt->width0, align_w); + } + /* May need to adjust pitch to accomodate the placement of * the 2nd mipmap. This occurs when the alignment * constraints of mipmap placement push the right edge of the * 2nd mipmap out past the width of its parent. */ if (mt->first_level != mt->last_level) { - GLuint mip1_width = align(minify(mt->width0), align_w) - + minify(minify(mt->width0)); + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } - if (mip1_width > mt->width0) - mt->pitch = mip1_width; + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } } /* Pitch must be a whole number of dwords, even though we @@ -79,7 +103,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr if (mt->compressed) img_height = MAX2(1, height/4); else - img_height = align(height, align_h); + img_height = ALIGN(height, align_h); /* Because the images are packed better, the final offset @@ -90,7 +114,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr /* Layout_below: step right after second mipmap. */ if (level == mt->first_level + 1) { - x += align(width, align_w); + x += ALIGN(width, align_w); } else { y += img_height; diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h index 46151dbb7a6..193699d3f70 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.h +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h @@ -39,3 +39,4 @@ static GLuint minify( GLuint d ) } extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ); +extern GLuint intel_compressed_alignment(GLenum); diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 786a298cc3b..8eaa6fe9406 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -70,6 +70,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ATI_fragment_shader #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord +#define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate @@ -133,6 +134,7 @@ const struct dri_extension card_extensions[] = { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, { "GL_EXT_blend_subtract", NULL }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index d12c3bc664c..b5a63eb5f66 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -1818,6 +1818,12 @@ void r200UpdateTextureState( GLcontext *ctx ) GLboolean ok; GLuint dbg; + /* NOTE: must not manipulate rmesa->state.texture.unit[].unitneeded or + rmesa->state.envneeded before a R200_STATECHANGE (or R200_NEWPRIM) since + we use these to determine if we want to emit the corresponding state + atoms. */ + R200_NEWPRIM( rmesa ); + if (ctx->ATIFragmentShader._Enabled) { GLuint i; for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) { diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 6089d617c6b..70881fd7572 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -744,9 +744,16 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte goto next; case OPCODE_MAD: + /* only 2 read ports into temp memory thus may need the macro op MAD_2 + instead (requiring 2 clocks) if all inputs are in temp memory + (and, only if they actually reference 3 distinct temps) */ hw_op=(src[0].File == PROGRAM_TEMPORARY && src[1].File == PROGRAM_TEMPORARY && - src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; + src[2].File == PROGRAM_TEMPORARY && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) && + (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ? + R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); @@ -874,8 +881,11 @@ else { case OPCODE_XPD: /* mul r0, r1.yzxw, r2.zxyw mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 */ + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ? + R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, @@ -901,7 +911,7 @@ else { o_inst++; u_temp_i--; - o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst), + o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 9ea14ab4c78..4ad947b0884 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -83,6 +83,7 @@ int hw_tcl_on = 1; #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_minmax //#define need_GL_EXT_fog_coord +#define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate @@ -110,6 +111,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, // {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 907a987fbe2..0c86b223a42 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -90,7 +90,7 @@ DRI_CONF_BEGIN DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG DRI_CONF_NO_RAST(false) @@ -117,7 +117,7 @@ DRI_CONF_BEGIN DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_TEXTURE_BLEND_QUALITY(1.0,"0.0:1.0") DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 50cfd99e1a7..a6b5d08c100 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1182,11 +1182,12 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) * GLX_ARB_multisample */ case GLX_SAMPLE_BUFFERS_ARB: - /* ms not supported */ - return NULL; case GLX_SAMPLES_ARB: - /* ms not supported */ - return NULL; + parselist++; + if (*parselist++ != 0) + /* ms not supported */ + return NULL; + break; /* * FBConfig attribs. diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index e79a905346c..a240baf3011 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -610,6 +610,21 @@ delete_arrayobj_cb(GLuint id, void *data, void *userData) } /** + * Callback for freeing shader program data. Call it before delete_shader_cb + * to avoid memory access error. + */ +static void +free_shader_program_data_cb(GLuint id, void *data, void *userData) +{ + GLcontext *ctx = (GLcontext *) userData; + struct gl_shader_program *shProg = (struct gl_shader_program *) data; + + if (shProg->Type == GL_SHADER_PROGRAM_MESA) { + _mesa_free_shader_program_data(ctx, shProg); + } +} + +/** * Callback for deleting shader and shader programs objects. * Called by _mesa_HashDeleteAll(). */ @@ -708,6 +723,7 @@ free_shared_state( GLcontext *ctx, struct gl_shared_state *ss ) _mesa_DeleteHashTable(ss->ArrayObjects); #if FEATURE_ARB_shader_objects + _mesa_HashWalk(ss->ShaderObjects, free_shader_program_data_cb, ctx); _mesa_HashDeleteAll(ss->ShaderObjects, delete_shader_cb, ctx); _mesa_DeleteHashTable(ss->ShaderObjects); #endif diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index d4990bb795f..fb54d6184d9 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -213,7 +213,7 @@ put_values_z24(GLcontext *ctx, struct gl_renderbuffer *z24rb, GLuint count, const void *values, const GLubyte *mask) { struct gl_renderbuffer *dsrb = z24rb->Wrapped; - const GLubyte *src = (const GLubyte *) values; + const GLuint *src = (const GLuint *) values; ASSERT(z24rb->DataType == GL_UNSIGNED_INT); ASSERT(dsrb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 844db6b9d2d..1b2f90ef912 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -3246,6 +3246,36 @@ save_StencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask) static void GLAPIENTRY +save_StencilFuncSeparateATI(GLenum frontfunc, GLenum backfunc, GLint ref, + GLuint mask) +{ + GET_CURRENT_CONTEXT(ctx); + Node *n; + ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx); + /* GL_FRONT */ + n = ALLOC_INSTRUCTION(ctx, OPCODE_STENCIL_FUNC_SEPARATE, 4); + if (n) { + n[1].e = GL_FRONT; + n[2].e = frontfunc; + n[3].i = ref; + n[4].ui = mask; + } + /* GL_BACK */ + n = ALLOC_INSTRUCTION(ctx, OPCODE_STENCIL_FUNC_SEPARATE, 4); + if (n) { + n[1].e = GL_BACK; + n[2].e = backfunc; + n[3].i = ref; + n[4].ui = mask; + } + if (ctx->ExecuteFlag) { + CALL_StencilFuncSeparate(ctx->Exec, (GL_FRONT, frontfunc, ref, mask)); + CALL_StencilFuncSeparate(ctx->Exec, (GL_BACK, backfunc, ref, mask)); + } +} + + +static void GLAPIENTRY save_StencilMaskSeparate(GLenum face, GLuint mask) { GET_CURRENT_CONTEXT(ctx); @@ -7870,6 +7900,9 @@ _mesa_init_dlist_table(struct _glapi_table *table) SET_StencilMaskSeparate(table, save_StencilMaskSeparate); SET_StencilOpSeparate(table, save_StencilOpSeparate); + /* ATI_separate_stencil */ + SET_StencilFuncSeparateATI(table, save_StencilFuncSeparateATI); + /* GL_ARB_imaging */ /* Not all are supported */ SET_BlendColor(table, save_BlendColor); diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c index 3acccf04300..b0f54dcd48d 100644 --- a/src/mesa/main/drawpix.c +++ b/src/mesa/main/drawpix.c @@ -342,8 +342,9 @@ _mesa_Bitmap( GLsizei width, GLsizei height, if (ctx->RenderMode == GL_RENDER) { /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */ - GLint x = IFLOOR(ctx->Current.RasterPos[0] - xorig); - GLint y = IFLOOR(ctx->Current.RasterPos[1] - yorig); + const GLfloat epsilon = 0.0001; + GLint x = IFLOOR(ctx->Current.RasterPos[0] + epsilon - xorig); + GLint y = IFLOOR(ctx->Current.RasterPos[1] + epsilon - yorig); ctx->Driver.Bitmap( ctx, x, y, width, height, &ctx->Unpack, bitmap ); } #if _HAVE_FULL_GL diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 2ecfbdc0264..e324deaf29a 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -104,6 +104,8 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment) (void) posix_memalign(& mem, alignment, bytes); return mem; +#elif defined(_WIN32) && defined(_MSC_VER) + return _aligned_malloc(bytes, alignment); #else uintptr_t ptr, buf; @@ -144,6 +146,15 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment) } return mem; +#elif defined(_WIN32) && defined(_MSC_VER) + void *mem; + + mem = _aligned_malloc(bytes, alignment); + if (mem != NULL) { + (void) memset(mem, 0, bytes); + } + + return mem; #else uintptr_t ptr, buf; @@ -180,6 +191,8 @@ _mesa_align_free(void *ptr) { #if defined(HAVE_POSIX_MEMALIGN) free(ptr); +#elif defined(_WIN32) && defined(_MSC_VER) + _aligned_free(ptr); #else void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); void *realAddr = *cubbyHole; @@ -194,6 +207,10 @@ void * _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize, unsigned long alignment) { +#if defined(_WIN32) && defined(_MSC_VER) + (void) oldSize; + return _aligned_realloc(oldBuffer, newSize, alignment); +#else const size_t copySize = (oldSize < newSize) ? oldSize : newSize; void *newBuf = _mesa_align_malloc(newSize, alignment); if (newBuf && oldBuffer && copySize > 0) { @@ -202,6 +219,7 @@ _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize, if (oldBuffer) _mesa_align_free(oldBuffer); return newBuf; +#endif } @@ -258,7 +276,7 @@ _mesa_memset16( unsigned short *dst, unsigned short val, size_t n ) void _mesa_bzero( void *dst, size_t n ) { -#if defined(__FreeBSD__) +#if defined(__FreeBSD__) || defined(__DragonFly__) bzero( dst, n ); #else memset( dst, 0, n ); @@ -560,6 +578,7 @@ _mesa_ffs(int i) bit++; i >>= 1; } + bit++; } return bit; #else diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c index c48063d9192..4f329cdf59b 100644 --- a/src/mesa/main/texcompress_s3tc.c +++ b/src/mesa/main/texcompress_s3tc.c @@ -577,6 +577,32 @@ const struct gl_texture_format _mesa_texformat_rgb_dxt1 = { NULL /* StoreTexel */ }; +#if FEATURE_EXT_texture_sRGB +const struct gl_texture_format _mesa_texformat_srgb_dxt1 = { + MESA_FORMAT_SRGB_DXT1, /* MesaFormat */ + GL_RGB, /* BaseFormat */ + GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ + 4, /*approx*/ /* RedBits */ + 4, /*approx*/ /* GreenBits */ + 4, /*approx*/ /* BlueBits */ + 0, /* AlphaBits */ + 0, /* LuminanceBits */ + 0, /* IntensityBits */ + 0, /* IndexBits */ + 0, /* DepthBits */ + 0, /* StencilBits */ + 0, /* TexelBytes */ + texstore_rgb_dxt1, /* StoreTexImageFunc */ + NULL, /*impossible*/ /* FetchTexel1D */ + fetch_texel_2d_rgb_dxt1, /* FetchTexel2D */ + NULL, /*impossible*/ /* FetchTexel3D */ + NULL, /*impossible*/ /* FetchTexel1Df */ + fetch_texel_2d_f_rgb_dxt1, /* FetchTexel2Df */ + NULL, /*impossible*/ /* FetchTexel3Df */ + NULL /* StoreTexel */ +}; +#endif + const struct gl_texture_format _mesa_texformat_rgba_dxt1 = { MESA_FORMAT_RGBA_DXT1, /* MesaFormat */ GL_RGBA, /* BaseFormat */ diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index 387779323a3..5f8dc7dc188 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -1242,7 +1242,7 @@ _mesa_UpdateTexEnvProgram( GLcontext *ctx ) /* If a conventional fragment program/shader isn't in effect... */ if (!ctx->FragmentProgram._Enabled && - !ctx->Shader.CurrentProgram) { + (!ctx->Shader.CurrentProgram || !ctx->Shader.CurrentProgram->FragmentProgram)) { make_state_key(ctx, &key); hash = hash_key(&key); @@ -1271,7 +1271,8 @@ _mesa_UpdateTexEnvProgram( GLcontext *ctx ) } } else { - ctx->FragmentProgram._Current = ctx->FragmentProgram.Current; + /* _Current pointer has been updated in update_program */ + /* ctx->FragmentProgram._Current = ctx->FragmentProgram.Current; */ } /* Tell the driver about the change. Could define a new target for diff --git a/src/mesa/main/texformat.h b/src/mesa/main/texformat.h index 55851db7016..82023b946d5 100644 --- a/src/mesa/main/texformat.h +++ b/src/mesa/main/texformat.h @@ -97,6 +97,7 @@ enum _format { MESA_FORMAT_SRGBA8, MESA_FORMAT_SL8, MESA_FORMAT_SLA8, + MESA_FORMAT_SRGB_DXT1, /*@}*/ #endif @@ -168,6 +169,7 @@ extern const struct gl_texture_format _mesa_texformat_srgb8; extern const struct gl_texture_format _mesa_texformat_srgba8; extern const struct gl_texture_format _mesa_texformat_sl8; extern const struct gl_texture_format _mesa_texformat_sla8; +extern const struct gl_texture_format _mesa_texformat_srgb_dxt1; /*@}*/ #endif diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index a5966e7ecda..d99738b1ab7 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -209,6 +209,9 @@ calculate_derived_texenv( struct gl_tex_env_combine_state *state, return; } + if (mode == GL_REPLACE_EXT) + mode = GL_REPLACE; + switch (mode) { case GL_REPLACE: case GL_MODULATE: @@ -311,7 +314,9 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param ) switch (pname) { case GL_TEXTURE_ENV_MODE: { - const GLenum mode = (GLenum) (GLint) *param; + GLenum mode = (GLenum) (GLint) *param; + if (mode == GL_REPLACE_EXT) + mode = GL_REPLACE; if (texUnit->EnvMode == mode) return; if (mode == GL_MODULATE || diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index f4a12af3e4e..4a89d8951ef 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -1519,8 +1519,9 @@ _mesa_execute_program(GLcontext * ctx, case OPCODE_END: return GL_TRUE; default: - _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program", + _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program", inst->Opcode); + assert(0); return GL_TRUE; /* return value doesn't matter */ } diff --git a/src/mesa/shader/prog_parameter.c b/src/mesa/shader/prog_parameter.c index 46d30872e4f..3ad7215755d 100644 --- a/src/mesa/shader/prog_parameter.c +++ b/src/mesa/shader/prog_parameter.c @@ -40,8 +40,7 @@ struct gl_program_parameter_list * _mesa_new_parameter_list(void) { - return (struct gl_program_parameter_list *) - _mesa_calloc(sizeof(struct gl_program_parameter_list)); + return CALLOC_STRUCT(gl_program_parameter_list); } diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index 95cabe73234..12fcb675c3d 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -187,7 +187,6 @@ _mesa_init_program_struct( GLcontext *ctx, struct gl_program *prog, { (void) ctx; if (prog) { - _mesa_bzero(prog, sizeof(*prog)); prog->Id = id; prog->Target = target; prog->Resident = GL_TRUE; @@ -242,6 +241,8 @@ _mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog, struct gl_program * _mesa_new_program(GLcontext *ctx, GLenum target, GLuint id) { + if (ctx->Driver.NewProgram) + return ctx->Driver.NewProgram(ctx, target, id); switch (target) { case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */ return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 5f9e2b84a4e..fa14b7d3b32 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -128,10 +128,17 @@ _mesa_free_shader_program_data(GLcontext *ctx, for (i = 0; i < shProg->NumShaders; i++) { _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL); } + shProg->NumShaders = 0; + if (shProg->Shaders) { _mesa_free(shProg->Shaders); shProg->Shaders = NULL; } + + if (shProg->InfoLog) { + _mesa_free(shProg->InfoLog); + shProg->InfoLog = NULL; + } } @@ -142,10 +149,7 @@ void _mesa_free_shader_program(GLcontext *ctx, struct gl_shader_program *shProg) { _mesa_free_shader_program_data(ctx, shProg); - if (shProg->Shaders) { - _mesa_free(shProg->Shaders); - shProg->Shaders = NULL; - } + _mesa_free(shProg); } @@ -380,6 +384,17 @@ sizeof_glsl_type(GLenum type) case GL_BOOL: case GL_FLOAT: case GL_INT: + case GL_SAMPLER_1D: + case GL_SAMPLER_2D: + case GL_SAMPLER_3D: + case GL_SAMPLER_CUBE: + case GL_SAMPLER_1D_SHADOW: + case GL_SAMPLER_2D_SHADOW: + case GL_SAMPLER_2D_RECT_ARB: + case GL_SAMPLER_2D_RECT_SHADOW_ARB: + case GL_SAMPLER_1D_ARRAY_SHADOW_EXT: + case GL_SAMPLER_2D_ARRAY_SHADOW_EXT: + case GL_SAMPLER_CUBE_SHADOW_EXT: return 1; case GL_BOOL_VEC2: case GL_FLOAT_VEC2: diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index ef9c0ab3f9a..acaf658f381 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1840,6 +1840,7 @@ _slang_gen_var_decl(slang_assemble_ctx *A, slang_variable *var) n->Store->File = PROGRAM_TEMPORARY; n->Store->Size = _slang_sizeof_type_specifier(&n->Var->type.specifier); + A->program->NumTemporaries++; assert(n->Store->Size > 0); } return n; diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 7b0868e80e5..5116cd14e55 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -339,6 +339,9 @@ _slang_resolve_attributes(struct gl_shader_program *shProg, return GL_FALSE; } _mesa_add_attribute(shProg->Attributes, name, size, attr); + + /* set the attribute as used */ + usedAttributes |= 1<<attr; } inst->SrcReg[j].Index = VERT_ATTRIB_GENERIC0 + attr; diff --git a/src/mesa/shader/slang/slang_preprocess.c b/src/mesa/shader/slang/slang_preprocess.c index 72281eda57d..a986db153ee 100644 --- a/src/mesa/shader/slang/slang_preprocess.c +++ b/src/mesa/shader/slang/slang_preprocess.c @@ -483,6 +483,7 @@ pp_cond_stack_reevaluate (pp_cond_stack *self) typedef struct { GLboolean MESA_shader_debug; /* GL_MESA_shader_debug enable */ + GLboolean ARB_texture_rectangle; /* GL_ARB_texture_rectangle enable */ } pp_ext; /* @@ -498,6 +499,7 @@ static GLvoid pp_ext_init (pp_ext *self) { pp_ext_disable_all (self); + self->ARB_texture_rectangle = GL_TRUE; /* Other initialization code goes here. */ } @@ -506,6 +508,8 @@ pp_ext_set (pp_ext *self, const char *name, GLboolean enable) { if (_mesa_strcmp (name, "MESA_shader_debug") == 0) self->MESA_shader_debug = enable; + else if (_mesa_strcmp (name, "GL_ARB_texture_rectangle") == 0) + self->ARB_texture_rectangle = enable; /* Next extension name tests go here. */ else return GL_FALSE; diff --git a/src/mesa/swrast/s_aatriangle.c b/src/mesa/swrast/s_aatriangle.c index 0d95f06a9de..587c8688417 100644 --- a/src/mesa/swrast/s_aatriangle.c +++ b/src/mesa/swrast/s_aatriangle.c @@ -368,11 +368,14 @@ index_aa_tri(GLcontext *ctx, * Compute mipmap level of detail. * XXX we should really include the R coordinate in this computation * in order to do 3-D texture mipmapping. + * \param cx, cy fragment center position + * \param unit texture unit */ static INLINE GLfloat -compute_lambda(const GLfloat sPlane[4], const GLfloat tPlane[4], +compute_lambda(GLcontext *ctx, + const GLfloat sPlane[4], const GLfloat tPlane[4], const GLfloat qPlane[4], GLfloat cx, GLfloat cy, - GLfloat invQ, GLfloat texWidth, GLfloat texHeight) + GLfloat invQ, GLuint unit) { const GLfloat s = solve_plane(cx, cy, sPlane); const GLfloat t = solve_plane(cx, cy, tPlane); @@ -387,6 +390,15 @@ compute_lambda(const GLfloat sPlane[4], const GLfloat tPlane[4], GLfloat dtdx = t_x1 * invQ_x1 - t * invQ; GLfloat dtdy = t_y1 * invQ_y1 - t * invQ; GLfloat maxU, maxV, rho, lambda; + GLfloat texWidth = 1.0, texHeight = 1.0; + const struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; + if (obj) { + const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel]; + if (texImage){ + texWidth = (GLfloat) texImage->Width; + texHeight = (GLfloat) texImage->Height; + } + } dsdx = FABSF(dsdx); dsdy = FABSF(dsdy); dtdx = FABSF(dtdx); diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h index b26f21f5db0..f323701cb7b 100644 --- a/src/mesa/swrast/s_aatritemp.h +++ b/src/mesa/swrast/s_aatritemp.h @@ -75,8 +75,6 @@ GLfloat tPlane[FRAG_ATTRIB_MAX][4]; /* texture T */ GLfloat uPlane[FRAG_ATTRIB_MAX][4]; /* texture R */ GLfloat vPlane[FRAG_ATTRIB_MAX][4]; /* texture Q */ - GLfloat texWidth[FRAG_ATTRIB_MAX]; - GLfloat texHeight[FRAG_ATTRIB_MAX]; #endif GLfloat bf = SWRAST_CONTEXT(ctx)->_BackfaceCullSign; @@ -206,16 +204,6 @@ compute_plane(p0, p1, p2, t0, t1, t2, tPlane[attr]); compute_plane(p0, p1, p2, r0, r1, r2, uPlane[attr]); compute_plane(p0, p1, p2, q0, q1, q2, vPlane[attr]); - if (attr < FRAG_ATTRIB_VAR0 && attr >= FRAG_ATTRIB_TEX0) { - const GLuint u = attr - FRAG_ATTRIB_TEX0; - const struct gl_texture_object *obj = ctx->Texture.Unit[u]._Current; - const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel]; - texWidth[attr] = (GLfloat) texImage->Width; - texHeight[attr] = (GLfloat) texImage->Height; - } - else { - texWidth[attr] = texHeight[attr] = 1.0; - } ATTRIB_LOOP_END } span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA | SPAN_VARYING); @@ -292,11 +280,14 @@ array->attribs[attr][count][0] = solve_plane(cx, cy, sPlane[attr]) * invQ; array->attribs[attr][count][1] = solve_plane(cx, cy, tPlane[attr]) * invQ; array->attribs[attr][count][2] = solve_plane(cx, cy, uPlane[attr]) * invQ; - if (attr < FRAG_ATTRIB_VAR0 && attr >= FRAG_ATTRIB_TEX0) { + if (attr >= FRAG_ATTRIB_TEX0 && attr < FRAG_ATTRIB_VAR0) { const GLuint unit = attr - FRAG_ATTRIB_TEX0; - array->lambda[unit][count] = compute_lambda(sPlane[attr], tPlane[attr], - vPlane[attr], cx, cy, invQ, - texWidth[attr], texHeight[attr]); + array->lambda[unit][count] = compute_lambda(ctx, + sPlane[attr], + tPlane[attr], + vPlane[attr], + cx, cy, invQ, + unit); } ATTRIB_LOOP_END #endif @@ -385,14 +376,14 @@ array->attribs[attr][ix][0] = solve_plane(cx, cy, sPlane[attr]) * invQ; array->attribs[attr][ix][1] = solve_plane(cx, cy, tPlane[attr]) * invQ; array->attribs[attr][ix][2] = solve_plane(cx, cy, uPlane[attr]) * invQ; - if (attr < FRAG_ATTRIB_VAR0 && attr >= FRAG_ATTRIB_TEX0) { + if (attr >= FRAG_ATTRIB_TEX0 && attr < FRAG_ATTRIB_VAR0) { const GLuint unit = attr - FRAG_ATTRIB_TEX0; - array->lambda[unit][ix] = compute_lambda(sPlane[attr], + array->lambda[unit][ix] = compute_lambda(ctx, + sPlane[attr], tPlane[attr], vPlane[attr], cx, cy, invQ, - texWidth[attr], - texHeight[attr]); + unit); } ATTRIB_LOOP_END #endif diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index d8f167b3577..893a5e87317 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -187,7 +187,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) arrays[attr].BufferObj = exec->vtx.bufferobj; /* NullBufferObj */ arrays[attr]._MaxElement = count; /* ??? */ - data += exec->vtx.attrsz[attr] * sizeof(GLfloat); + data += exec->vtx.attrsz[src] * sizeof(GLfloat); } } } diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index e7794c2a6cc..b68215c666a 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -864,6 +864,9 @@ static void GLAPIENTRY _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum _ae_map_vbos( ctx ); + if (ctx->Array.ElementArrayBufferObj->Name) + indices = ADD_POINTERS(ctx->Array.ElementArrayBufferObj->Pointer, indices); + vbo_save_NotifyBegin( ctx, mode | VBO_SAVE_PRIM_WEAK ); switch (type) { diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index 697d00e9d11..95af8549f22 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -131,7 +131,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx, break; } - for (attr = 0; attr < VBO_ATTRIB_MAX; attr++) { + for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { GLuint src = map[attr]; if (node->attrsz[src]) { @@ -146,7 +146,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx, assert(arrays[attr].BufferObj->Name); - data += node->attrsz[attr] * sizeof(GLfloat); + data += node->attrsz[src] * sizeof(GLfloat); } } } diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c index 0b2af0a3706..bbdebe046f0 100644 --- a/src/mesa/x86/common_x86.c +++ b/src/mesa/x86/common_x86.c @@ -38,7 +38,7 @@ #if defined(USE_SSE_ASM) && defined(__linux__) #include <linux/version.h> #endif -#if defined(USE_SSE_ASM) && defined(__FreeBSD__) +#if defined(USE_SSE_ASM) && (defined(__FreeBSD__) || defined(__DragonFly__)) #include <sys/types.h> #include <sys/sysctl.h> #endif @@ -104,7 +104,7 @@ static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp) static void check_os_sse_support( void ) { -#if defined(__FreeBSD__) +#if defined(__FreeBSD__) || defined(__DragonFly__) { int ret, enabled; unsigned int len; diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S index 2e5c3be83fe..80144b889c7 100644 --- a/src/mesa/x86/read_rgba_span_x86.S +++ b/src/mesa/x86/read_rgba_span_x86.S @@ -434,7 +434,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: je .L47 movq (%ebx), %xmm0 - + addl $8, %ebx + movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 andps %xmm1, %xmm0 @@ -448,6 +449,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: orps %xmm3, %xmm0 movq %xmm0, (%ecx) + addl $8, %ecx .L47: testl $1, %edx diff --git a/windows/VC8/mesa/mesa.sln b/windows/VC8/mesa/mesa.sln index 46d361ae28a..5eff4884300 100644 --- a/windows/VC8/mesa/mesa.sln +++ b/windows/VC8/mesa/mesa.sln @@ -6,6 +6,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gdi", "gdi\gdi.vcproj", "{A EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "glu", "glu\glu.vcproj", "{2E50FDAF-430B-475B-AE6B-60B68F2875BA}"
+ ProjectSection(ProjectDependencies) = postProject
+ {A1B24907-E196-4826-B6AF-26723629B633} = {A1B24907-E196-4826-B6AF-26723629B633}
+ EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mesa", "mesa\mesa.vcproj", "{2120C974-2717-4709-B44F-D6E6D0A56448}"
EndProject
|